Skip to content

Commit

Permalink
Merge #773: refactor expression parsing and checksum checking
Browse files Browse the repository at this point in the history
eb4f8c4 expression: move checksum validation into expression parsing (Andrew Poelstra)
bb2e658 expression: add explicit well-formedness check to parser (Andrew Poelstra)
9b03043 expression: add some unit tests (Andrew Poelstra)
ad85f92 expression: implement PartialEq/Eq for trees (Andrew Poelstra)
b0508d8 checksum: clean up errors and error paths (Andrew Poelstra)

Pull request description:

  As a step toward rewriting the expression parser to be non-recursive, add a pre-parsing well-formedness check, which verifies that an expression is well-formed, uses only the correct character set, and that the checksum (if present) is valid.

  Along the way, replace the error types returned from the `expression` module with a new more-precise one which can identify the location of parse errors (and identify what the error was in a more correct way). This improves our error reporting and drops many instances of the stringly-typed `BadDescriptor` error.

  There is currently a bunch of special logic for Taproot descriptors which have the extra characters `{` and `}`. To the extent possible, this PR doesn't touch that logic. It will be addressed in a later PR.

  The benchmarks show a slight slowdown since we essentially added new validation logic without removing the old logic. Later PRs will improve things.

ACKs for top commit:
  sanket1729:
    ACK eb4f8c4

Tree-SHA512: cb972e9683aca51f8d18ab61521af8606f47bd1d0bc37dd1ed085101dbc4dd69b382eb05e8e21d2856ac68ebe7d2ca7879ca8a0692dacbea0b22b7b10c9ef987
  • Loading branch information
apoelstra committed Nov 13, 2024
2 parents ae8f450 + eb4f8c4 commit 460400d
Show file tree
Hide file tree
Showing 12 changed files with 465 additions and 120 deletions.
7 changes: 2 additions & 5 deletions src/descriptor/bare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use core::fmt;
use bitcoin::script::{self, PushBytes};
use bitcoin::{Address, Network, ScriptBuf, Weight};

use super::checksum::verify_checksum;
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
use crate::expression::{self, FromTree};
use crate::miniscript::context::{ScriptContext, ScriptContextError};
Expand Down Expand Up @@ -186,8 +185,7 @@ impl<Pk: FromStrKey> FromTree for Bare<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Bare<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down Expand Up @@ -387,8 +385,7 @@ impl<Pk: FromStrKey> FromTree for Pkh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Pkh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down
146 changes: 110 additions & 36 deletions src/descriptor/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,46 +14,117 @@ use core::iter::FromIterator;
use bech32::primitives::checksum::PackedFe32;
use bech32::{Checksum, Fe32};

pub use crate::expression::VALID_CHARS;
use crate::prelude::*;
use crate::Error;

const CHECKSUM_LENGTH: usize = 8;
const CODE_LENGTH: usize = 32767;

/// Compute the checksum of a descriptor.
/// Map of valid characters in descriptor strings.
///
/// Note that this function does not check if the descriptor string is
/// syntactically correct or not. This only computes the checksum.
pub fn desc_checksum(desc: &str) -> Result<String, Error> {
let mut eng = Engine::new();
eng.input(desc)?;
Ok(eng.checksum())
/// The map starts at 32 (space) and runs up to 126 (tilde).
#[rustfmt::skip]
const CHAR_MAP: [u8; 95] = [
94, 59, 92, 91, 28, 29, 50, 15, 10, 11, 17, 51, 14, 52, 53, 16,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 27, 54, 55, 56, 57, 58,
26, 82, 83, 84, 85, 86, 87, 88, 89, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 12, 93, 13, 60, 61,
90, 18, 19, 20, 21, 22, 23, 24, 25, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 30, 62, 31, 63,
];

/// Error validating descriptor checksum.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Error {
/// Character outside of descriptor charset.
InvalidCharacter {
/// The character in question.
ch: char,
/// Its position in the string.
pos: usize,
},
/// Checksum had the incorrect length.
InvalidChecksumLength {
/// The length of the checksum in the string.
actual: usize,
/// The length of a valid descriptor checksum.
expected: usize,
},
/// Checksum was invalid.
InvalidChecksum {
/// The checksum in the string.
actual: [char; CHECKSUM_LENGTH],
/// The checksum that should have been there, assuming the string is valid.
expected: [char; CHECKSUM_LENGTH],
},
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::InvalidCharacter { ch, pos } => {
write!(f, "invalid character '{}' (position {})", ch, pos)
}
Error::InvalidChecksumLength { actual, expected } => {
write!(f, "invalid checksum (length {}, expected {})", actual, expected)
}
Error::InvalidChecksum { actual, expected } => {
f.write_str("invalid checksum ")?;
for ch in actual {
ch.fmt(f)?;
}
f.write_str("; expected ")?;
for ch in expected {
ch.fmt(f)?;
}
Ok(())
}
}
}
}

#[cfg(feature = "std")]
impl std::error::Error for Error {
fn cause(&self) -> Option<&dyn std::error::Error> { None }
}

/// Helper function for `FromStr` for various descriptor types.
///
/// Checks and verifies the checksum if it is present and returns the descriptor
/// string without the checksum.
pub(super) fn verify_checksum(s: &str) -> Result<&str, Error> {
for ch in s.as_bytes() {
if *ch < 20 || *ch > 127 {
return Err(Error::Unprintable(*ch));
pub fn verify_checksum(s: &str) -> Result<&str, Error> {
let mut last_hash_pos = s.len();
for (pos, ch) in s.char_indices() {
if !(32..127).contains(&u32::from(ch)) {
return Err(Error::InvalidCharacter { ch, pos });
} else if ch == '#' {
last_hash_pos = pos;
}
}
// After this point we know we have ASCII and can stop using character methods.

if last_hash_pos < s.len() {
let checksum_str = &s[last_hash_pos + 1..];
if checksum_str.len() != CHECKSUM_LENGTH {
return Err(Error::InvalidChecksumLength {
actual: checksum_str.len(),
expected: CHECKSUM_LENGTH,
});
}

let mut eng = Engine::new();
eng.input_unchecked(s[..last_hash_pos].as_bytes());

let mut parts = s.splitn(2, '#');
let desc_str = parts.next().unwrap();
if let Some(checksum_str) = parts.next() {
let expected_sum = desc_checksum(desc_str)?;
if checksum_str != expected_sum {
return Err(Error::BadDescriptor(format!(
"Invalid checksum '{}', expected '{}'",
checksum_str, expected_sum
)));
let expected = eng.checksum_chars();
let mut actual = ['_'; CHECKSUM_LENGTH];
for (act, ch) in actual.iter_mut().zip(checksum_str.chars()) {
*act = ch;
}

if expected != actual {
return Err(Error::InvalidChecksum { actual, expected });
}
}
Ok(desc_str)
Ok(&s[..last_hash_pos])
}

/// An engine to compute a checksum from a string.
Expand All @@ -78,16 +149,18 @@ impl Engine {
/// If this function returns an error, the `Engine` will be left in an indeterminate
/// state! It is safe to continue feeding it data but the result will not be meaningful.
pub fn input(&mut self, s: &str) -> Result<(), Error> {
for ch in s.chars() {
let pos = VALID_CHARS
.get(ch as usize)
.ok_or_else(|| {
Error::BadDescriptor(format!("Invalid character in checksum: '{}'", ch))
})?
.ok_or_else(|| {
Error::BadDescriptor(format!("Invalid character in checksum: '{}'", ch))
})? as u64;
for (pos, ch) in s.char_indices() {
if !(32..127).contains(&u32::from(ch)) {
return Err(Error::InvalidCharacter { ch, pos });
}
}
self.input_unchecked(s.as_bytes());
Ok(())
}

fn input_unchecked(&mut self, s: &[u8]) {
for ch in s {
let pos = u64::from(CHAR_MAP[usize::from(*ch) - 32]);
let fe = Fe32::try_from(pos & 31).expect("pos is valid because of the mask");
self.inner.input_fe(fe);

Expand All @@ -100,7 +173,6 @@ impl Engine {
self.clscount = 0;
}
}
Ok(())
}

/// Obtains the checksum characters of all the data thus-far fed to the
Expand Down Expand Up @@ -192,7 +264,9 @@ mod test {

macro_rules! check_expected {
($desc: expr, $checksum: expr) => {
assert_eq!(desc_checksum($desc).unwrap(), $checksum);
let mut eng = Engine::new();
eng.input_unchecked($desc.as_bytes());
assert_eq!(eng.checksum(), $checksum);
};
}

Expand Down Expand Up @@ -229,8 +303,8 @@ mod test {
let invalid_desc = format!("wpkh(tprv8ZgxMBicQKsPdpkqS7Eair4YxjcuuvDPNYmKX3sCniCf16tHEVrjjiSXEkFRnUH77yXc6ZcwHHcL{}fjdi5qUvw3VDfgYiH5mNsj5izuiu2N/1/2/*)", sparkle_heart);

assert_eq!(
desc_checksum(&invalid_desc).err().unwrap().to_string(),
format!("Invalid descriptor: Invalid character in checksum: '{}'", sparkle_heart)
verify_checksum(&invalid_desc).err().unwrap().to_string(),
format!("invalid character '{}' (position 85)", sparkle_heart)
);
}

Expand Down
17 changes: 7 additions & 10 deletions src/descriptor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use bitcoin::{
};
use sync::Arc;

use self::checksum::verify_checksum;
use crate::miniscript::decode::Terminal;
use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0};
use crate::plan::{AssetProvider, Plan};
Expand Down Expand Up @@ -988,8 +987,7 @@ impl<Pk: FromStrKey> FromStr for Descriptor<Pk> {
let desc = if s.starts_with("tr(") {
Ok(Descriptor::Tr(Tr::from_str(s)?))
} else {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
expression::FromTree::from_tree(&top)
}?;

Expand Down Expand Up @@ -1053,8 +1051,7 @@ mod tests {
use bitcoin::sighash::EcdsaSighashType;
use bitcoin::{bip32, PublicKey, Sequence};

use super::checksum::desc_checksum;
use super::*;
use super::{checksum, *};
use crate::hex_script;
#[cfg(feature = "compiler")]
use crate::policy;
Expand All @@ -1066,10 +1063,10 @@ mod tests {
let desc = Descriptor::<String>::from_str(s).unwrap();
let output = desc.to_string();
let normalize_aliases = s.replace("c:pk_k(", "pk(").replace("c:pk_h(", "pkh(");
assert_eq!(
format!("{}#{}", &normalize_aliases, desc_checksum(&normalize_aliases).unwrap()),
output
);

let mut checksum_eng = checksum::Engine::new();
checksum_eng.input(&normalize_aliases).unwrap();
assert_eq!(format!("{}#{}", &normalize_aliases, checksum_eng.checksum()), output);
}

#[test]
Expand Down Expand Up @@ -1841,7 +1838,7 @@ mod tests {
($secp: ident,$($desc: expr),*) => {
$(
match Descriptor::parse_descriptor($secp, $desc) {
Err(Error::BadDescriptor(_)) => {},
Err(Error::ParseTree(crate::ParseTreeError::Checksum(_))) => {},
Err(e) => panic!("Expected bad checksum for {}, got '{}'", $desc, e),
_ => panic!("Invalid checksum treated as valid: {}", $desc),
};
Expand Down
7 changes: 2 additions & 5 deletions src/descriptor/segwitv0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use core::fmt;

use bitcoin::{Address, Network, ScriptBuf, Weight};

use super::checksum::verify_checksum;
use super::SortedMultiVec;
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
use crate::expression::{self, FromTree};
Expand Down Expand Up @@ -288,8 +287,7 @@ impl<Pk: MiniscriptKey> fmt::Display for Wsh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Wsh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Wsh::<Pk>::from_tree(&top)
}
}
Expand Down Expand Up @@ -505,8 +503,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Wpkh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Wpkh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/descriptor/sh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use core::fmt;
use bitcoin::script::PushBytes;
use bitcoin::{script, Address, Network, ScriptBuf, Weight};

use super::checksum::verify_checksum;
use super::{SortedMultiVec, Wpkh, Wsh};
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
use crate::expression::{self, FromTree};
Expand Down Expand Up @@ -109,8 +108,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Sh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Sh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/descriptor/tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,9 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Tr<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Tr<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let desc_str = verify_checksum(s)
.map_err(From::from)
.map_err(Error::ParseTree)?;
let top = parse_tr_tree(desc_str)?;
Self::from_tree(&top)
}
Expand Down Expand Up @@ -587,8 +589,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Tr<Pk> {

// Helper function to parse string into miniscript tree form
fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
expression::check_valid_chars(s)?;

if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' {
let rest = &s[3..s.len() - 1];
if !rest.contains(',') {
Expand Down
Loading

0 comments on commit 460400d

Please sign in to comment.