From 4137a5953c2abedb1834ac6ab0103e022fd1e256 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 2 Sep 2024 16:04:52 +0000 Subject: [PATCH 1/5] expression: encapsulate threshold parsing Our current expression API has a `to_null_threshold` method, which works reasonably well but requires the caller translate the returned threshold by accessing individual children. We will later want to change the Tree represntation to make individual child access inefficient. To do this, encapsulate the threshold construction into a new verify_threshold. This previously was not done because our messy error structures made it very difficult to manage. But our error structures are less messy so it's possible now, though a bit of a hack. (We map everything to the global Error structure. Later we will do pretty-much the same thing but with ParseError in place of Error, which will be more elegant.) --- src/descriptor/sortedmulti.rs | 5 +-- src/expression/mod.rs | 65 ++++++++++++++++++++--------------- src/lib.rs | 5 +++ src/miniscript/astelem.rs | 20 ++--------- src/policy/concrete.rs | 6 ++-- src/policy/semantic.rs | 6 ++-- src/primitives/threshold.rs | 4 +-- 7 files changed, 52 insertions(+), 59 deletions(-) diff --git a/src/descriptor/sortedmulti.rs b/src/descriptor/sortedmulti.rs index fe0be4244..85664635e 100644 --- a/src/descriptor/sortedmulti.rs +++ b/src/descriptor/sortedmulti.rs @@ -69,10 +69,7 @@ impl SortedMultiVec { let ret = Self { inner: tree - .to_null_threshold() - .map_err(Error::ParseThreshold)? - .translate_by_index(|i| tree.args[i + 1].verify_terminal("public key")) - .map_err(Error::Parse)?, + .verify_threshold(|sub| sub.verify_terminal("public_key").map_err(Error::Parse))?, phantom: PhantomData, }; ret.constructor_check() diff --git a/src/expression/mod.rs b/src/expression/mod.rs index 8377da767..cdd688b31 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -239,6 +239,43 @@ impl<'a> Tree<'a> { Ok((&self.args[0], &self.args[1])) } + /// Parses an expression tree as a threshold (a term with at least one child, + /// the first of which is a positive integer k). + /// + /// This sanity-checks that the threshold is well-formed (begins with a valid + /// threshold value, etc.) but does not parse the children of the threshold. + /// Instead it returns a threshold holding the empty type `()`, which is + /// constructed without any allocations, and expects the caller to convert + /// this to the "real" threshold type by calling [`Threshold::translate`]. + /// + /// (An alternate API which does the conversion inline turned out to be + /// too messy; it needs to take a closure, have multiple generic parameters, + /// and be able to return multiple error types.) + pub fn verify_threshold< + const MAX: usize, + F: FnMut(&Self) -> Result, + T, + E: From, + >( + &self, + mut map_child: F, + ) -> Result, E> { + // First, special case "no arguments" so we can index the first argument without panics. + if self.args.is_empty() { + return Err(ParseThresholdError::NoChildren.into()); + } + + if !self.args[0].args.is_empty() { + return Err(ParseThresholdError::KNotTerminal.into()); + } + + let k = parse_num(self.args[0].name).map_err(ParseThresholdError::ParseK)? as usize; + Threshold::new(k, vec![(); self.args.len() - 1]) + .map_err(ParseThresholdError::Threshold) + .map_err(From::from) + .and_then(|thresh| thresh.translate_by_index(|i| map_child(&self.args[1 + i]))) + } + /// Check that a tree has no curly-brace children in it. pub fn verify_no_curly_braces(&self) -> Result<(), ParseTreeError> { for tree in self.pre_order_iter() { @@ -403,34 +440,6 @@ impl<'a> Tree<'a> { args.reverse(); Ok(Tree { name: &s[..node_name_end], children_pos, parens, args }) } - - /// Parses an expression tree as a threshold (a term with at least one child, - /// the first of which is a positive integer k). - /// - /// This sanity-checks that the threshold is well-formed (begins with a valid - /// threshold value, etc.) but does not parse the children of the threshold. - /// Instead it returns a threshold holding the empty type `()`, which is - /// constructed without any allocations, and expects the caller to convert - /// this to the "real" threshold type by calling [`Threshold::translate`]. - /// - /// (An alternate API which does the conversion inline turned out to be - /// too messy; it needs to take a closure, have multiple generic parameters, - /// and be able to return multiple error types.) - pub fn to_null_threshold( - &self, - ) -> Result, ParseThresholdError> { - // First, special case "no arguments" so we can index the first argument without panics. - if self.args.is_empty() { - return Err(ParseThresholdError::NoChildren); - } - - if !self.args[0].args.is_empty() { - return Err(ParseThresholdError::KNotTerminal); - } - - let k = parse_num(self.args[0].name).map_err(ParseThresholdError::ParseK)? as usize; - Threshold::new(k, vec![(); self.args.len() - 1]).map_err(ParseThresholdError::Threshold) - } } /// Parse a string as a u32, for timelocks or thresholds diff --git a/src/lib.rs b/src/lib.rs index 8115afe13..3b0c0d3bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -480,6 +480,11 @@ pub enum Error { Parse(ParseError), } +#[doc(hidden)] // will be removed when we remove Error +impl From for Error { + fn from(e: ParseThresholdError) -> Self { Self::ParseThreshold(e) } +} + // https://github.com/sipa/miniscript/pull/5 for discussion on this number const MAX_RECURSION_DEPTH: u32 = 402; diff --git a/src/miniscript/astelem.rs b/src/miniscript/astelem.rs index e5c20a5f6..d28617dd8 100644 --- a/src/miniscript/astelem.rs +++ b/src/miniscript/astelem.rs @@ -122,27 +122,13 @@ impl crate::expression::FromTree for Termina "or_c" => binary(top, "or_c", Terminal::OrC), "or_i" => binary(top, "or_i", Terminal::OrI), "thresh" => top - .to_null_threshold() - .map_err(Error::ParseThreshold)? - .translate_by_index(|i| Miniscript::from_tree(&top.args[1 + i]).map(Arc::new)) + .verify_threshold(|sub| Miniscript::from_tree(sub).map(Arc::new)) .map(Terminal::Thresh), "multi" => top - .to_null_threshold() - .map_err(Error::ParseThreshold)? - .translate_by_index(|i| { - top.args[1 + i] - .verify_terminal("public key") - .map_err(Error::Parse) - }) + .verify_threshold(|sub| sub.verify_terminal("public_key").map_err(Error::Parse)) .map(Terminal::Multi), "multi_a" => top - .to_null_threshold() - .map_err(Error::ParseThreshold)? - .translate_by_index(|i| { - top.args[1 + i] - .verify_terminal("public key") - .map_err(Error::Parse) - }) + .verify_threshold(|sub| sub.verify_terminal("public_key").map_err(Error::Parse)) .map(Terminal::MultiA), x => Err(Error::Parse(crate::ParseError::Tree(crate::ParseTreeError::UnknownName { name: x.to_owned(), diff --git a/src/policy/concrete.rs b/src/policy/concrete.rs index 3d202f2fc..b5eec2215 100644 --- a/src/policy/concrete.rs +++ b/src/policy/concrete.rs @@ -926,10 +926,8 @@ impl Policy { Ok(Policy::Or(subs)) } "thresh" => top - .to_null_threshold() - .map_err(Error::ParseThreshold)? - .translate_by_index(|i| Policy::from_tree(&top.args[1 + i]).map(Arc::new)) - .map(Policy::Thresh), + .verify_threshold(|sub| Self::from_tree(sub).map(Arc::new)) + .map(Self::Thresh), x => Err(Error::Parse(crate::ParseError::Tree(crate::ParseTreeError::UnknownName { name: x.to_owned(), }))), diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs index 865770bff..e23851292 100644 --- a/src/policy/semantic.rs +++ b/src/policy/semantic.rs @@ -343,7 +343,7 @@ impl expression::FromTree for Policy { Ok(Policy::Thresh(Threshold::new(1, subs).map_err(Error::Threshold)?)) } "thresh" => { - let thresh = top.to_null_threshold().map_err(Error::ParseThreshold)?; + let thresh = top.verify_threshold(|sub| Self::from_tree(sub).map(Arc::new))?; // thresh(1) and thresh(n) are disallowed in semantic policies if thresh.is_or() { @@ -353,9 +353,7 @@ impl expression::FromTree for Policy { return Err(Error::ParseThreshold(crate::ParseThresholdError::IllegalAnd)); } - thresh - .translate_by_index(|i| Policy::from_tree(&top.args[1 + i]).map(Arc::new)) - .map(Policy::Thresh) + Ok(Policy::Thresh(thresh)) } x => Err(Error::Parse(crate::ParseError::Tree(crate::ParseTreeError::UnknownName { name: x.to_owned(), diff --git a/src/primitives/threshold.rs b/src/primitives/threshold.rs index 7c40f7ccd..ee0c1bda5 100644 --- a/src/primitives/threshold.rs +++ b/src/primitives/threshold.rs @@ -151,8 +151,8 @@ impl Threshold { /// Like [`Self::translate_ref`] but passes indices to the closure rather than internal data. /// /// This is useful in situations where the data to be translated exists outside of the - /// threshold itself, and the threshold data is irrelevant. In particular it is commonly - /// paired with [`crate::expression::Tree::to_null_threshold`]. + /// threshold itself, and the threshold data is irrelevant. In particular it is used + /// within the `verify_threshold` method for expression trees. /// /// If the data to be translated comes from a post-order iterator, you may instead want /// [`Self::map_from_post_order_iter`]. From 616b6b6cf2b55cf4fe9230ac07c57ab4c249b9cb Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 2 Sep 2024 19:18:06 +0000 Subject: [PATCH 2/5] expression: hide all Tree fields and encapsulate API This removes the ability to randomly access children of tree nodes. You can get them in order using the `children` iterator, and get them recursively using the `TreeLike` iterator methods. (In the next commits we will specialize these a bit, providing a `pre_order_iter` and `rtl_post_order_iter` which let you efficiently skip over subtrees. We will need these to parse Taproot expression trees and they don't fit into the `TreeLike` trait, at least not efficiently.) --- src/descriptor/mod.rs | 2 +- src/descriptor/segwitv0.rs | 2 +- src/descriptor/sh.rs | 2 +- src/descriptor/tr.rs | 12 ++++----- src/expression/mod.rs | 51 +++++++++++++++++++++++++++++--------- src/miniscript/astelem.rs | 14 +++++++---- src/policy/concrete.rs | 8 +++--- src/policy/semantic.rs | 8 +++--- 8 files changed, 63 insertions(+), 36 deletions(-) diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs index ce7654958..f9e673c2b 100644 --- a/src/descriptor/mod.rs +++ b/src/descriptor/mod.rs @@ -966,7 +966,7 @@ impl Descriptor { impl crate::expression::FromTree for Descriptor { /// Parse an expression tree into a descriptor. fn from_tree(top: &expression::Tree) -> Result, Error> { - Ok(match (top.name, top.args.len() as u32) { + Ok(match (top.name(), top.n_children()) { ("pkh", 1) => Descriptor::Pkh(Pkh::from_tree(top)?), ("wpkh", 1) => Descriptor::Wpkh(Wpkh::from_tree(top)?), ("sh", 1) => Descriptor::Sh(Sh::from_tree(top)?), diff --git a/src/descriptor/segwitv0.rs b/src/descriptor/segwitv0.rs index 6679f062a..b09a9f887 100644 --- a/src/descriptor/segwitv0.rs +++ b/src/descriptor/segwitv0.rs @@ -253,7 +253,7 @@ impl crate::expression::FromTree for Wsh { .map_err(From::from) .map_err(Error::Parse)?; - if top.name == "sortedmulti" { + if top.name() == "sortedmulti" { return Ok(Wsh { inner: WshInner::SortedMulti(SortedMultiVec::from_tree(top)?) }); } let sub = Miniscript::from_tree(top)?; diff --git a/src/descriptor/sh.rs b/src/descriptor/sh.rs index f04ddae1c..f00e3f093 100644 --- a/src/descriptor/sh.rs +++ b/src/descriptor/sh.rs @@ -87,7 +87,7 @@ impl crate::expression::FromTree for Sh { .map_err(From::from) .map_err(Error::Parse)?; - let inner = match top.name { + let inner = match top.name() { "wsh" => ShInner::Wsh(Wsh::from_tree(top)?), "wpkh" => ShInner::Wpkh(Wpkh::from_tree(top)?), "sortedmulti" => ShInner::SortedMulti(SortedMultiVec::from_tree(top)?), diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs index 372c7911e..ba68a73fa 100644 --- a/src/descriptor/tr.rs +++ b/src/descriptor/tr.rs @@ -532,12 +532,12 @@ impl crate::expression::FromTree for Tr { } else { // From here on we are into the taptree. if item.n_children_yielded == 0 { - match item.node.parens { + match item.node.parens() { Parens::Curly => { - if !item.node.name.is_empty() { + if !item.node.name().is_empty() { return Err(Error::Parse(ParseError::Tree( ParseTreeError::IncorrectName { - actual: item.node.name.to_owned(), + actual: item.node.name().to_owned(), expected: "", }, ))); @@ -545,7 +545,7 @@ impl crate::expression::FromTree for Tr { if round_paren_depth > 0 { return Err(Error::Parse(ParseError::Tree( ParseTreeError::IllegalCurlyBrace { - pos: item.node.children_pos, + pos: item.node.children_pos(), }, ))); } @@ -555,7 +555,7 @@ impl crate::expression::FromTree for Tr { } } if item.is_complete { - if item.node.parens == Parens::Curly { + if item.node.parens() == Parens::Curly { if item.n_children_yielded == 2 { let rchild = tree_stack.pop().unwrap(); let lchild = tree_stack.pop().unwrap(); @@ -571,7 +571,7 @@ impl crate::expression::FromTree for Tr { ))); } } else { - if item.node.parens == Parens::Round { + if item.node.parens() == Parens::Round { round_paren_depth -= 1; } if round_paren_depth == 0 { diff --git a/src/expression/mod.rs b/src/expression/mod.rs index cdd688b31..390082cd1 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -22,14 +22,14 @@ pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVW /// A token of the form `x(...)` or `x` pub struct Tree<'a> { /// The name `x` - pub name: &'a str, + name: &'a str, /// Position one past the last character of the node's name. If it has /// children, the position of the '(' or '{'. - pub children_pos: usize, + children_pos: usize, /// The type of parentheses surrounding the node's children. - pub parens: Parens, + parens: Parens, /// The comma-separated contents of the `(...)`, if any - pub args: Vec>, + args: Vec>, } impl PartialEq for Tree<'_> { @@ -79,6 +79,32 @@ pub trait FromTree: Sized { } impl<'a> Tree<'a> { + /// The name of this tree node. + pub fn name(&self) -> &str { self.name } + + /// The 0-indexed byte-position of the name in the original expression tree. + pub fn name_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + + /// The 0-indexed byte-position of the '(' or '{' character which starts the + /// expression's children. + /// + /// If the expression has no children, returns one past the end of the name. + pub fn children_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + + /// The number of children this node has. + pub fn n_children(&self) -> usize { self.args.len() } + + /// The type of parenthesis surrounding this node's children. + /// + /// If the node has no children, this will be `Parens::None`. + pub fn parens(&self) -> Parens { self.parens } + + /// An iterator over the direct children of this node. + /// + /// If you want to iterate recursively, use the [`TreeLike`] API which + /// provides methods `pre_order_iter` and `post_order_iter`. + pub fn children(&self) -> impl ExactSizeIterator { self.args.iter() } + /// Split the name by a separating character. /// /// If the separator is present, returns the prefix before the separator and @@ -260,20 +286,21 @@ impl<'a> Tree<'a> { &self, mut map_child: F, ) -> Result, E> { + let mut child_iter = self.children(); + let kchild = match child_iter.next() { + Some(k) => k, + None => return Err(ParseThresholdError::NoChildren.into()), + }; // First, special case "no arguments" so we can index the first argument without panics. - if self.args.is_empty() { - return Err(ParseThresholdError::NoChildren.into()); - } - - if !self.args[0].args.is_empty() { + if kchild.n_children() > 0 { return Err(ParseThresholdError::KNotTerminal.into()); } - let k = parse_num(self.args[0].name).map_err(ParseThresholdError::ParseK)? as usize; - Threshold::new(k, vec![(); self.args.len() - 1]) + let k = parse_num(kchild.name()).map_err(ParseThresholdError::ParseK)? as usize; + Threshold::new(k, vec![(); self.n_children() - 1]) .map_err(ParseThresholdError::Threshold) .map_err(From::from) - .and_then(|thresh| thresh.translate_by_index(|i| map_child(&self.args[1 + i]))) + .and_then(|thresh| thresh.translate_by_index(|_| map_child(child_iter.next().unwrap()))) } /// Check that a tree has no curly-brace children in it. diff --git a/src/miniscript/astelem.rs b/src/miniscript/astelem.rs index d28617dd8..779ca7122 100644 --- a/src/miniscript/astelem.rs +++ b/src/miniscript/astelem.rs @@ -112,10 +112,14 @@ impl crate::expression::FromTree for Termina top.verify_n_children("andor", 3..=3) .map_err(From::from) .map_err(Error::Parse)?; - let x = Arc::>::from_tree(&top.args[0])?; - let y = Arc::>::from_tree(&top.args[1])?; - let z = Arc::>::from_tree(&top.args[2])?; - Ok(Terminal::AndOr(x, y, z)) + let mut child_iter = top + .children() + .map(|x| Arc::>::from_tree(x)); + Ok(Terminal::AndOr( + child_iter.next().unwrap()?, + child_iter.next().unwrap()?, + child_iter.next().unwrap()?, + )) } "or_b" => binary(top, "or_b", Terminal::OrB), "or_d" => binary(top, "or_d", Terminal::OrD), @@ -137,7 +141,7 @@ impl crate::expression::FromTree for Termina if frag_wrap == Some("") { return Err(Error::Parse(crate::ParseError::Tree( - crate::ParseTreeError::UnknownName { name: top.name.to_owned() }, + crate::ParseTreeError::UnknownName { name: top.name().to_owned() }, ))); } let ms = super::wrap_into_miniscript(unwrapped, frag_wrap.unwrap_or(""))?; diff --git a/src/policy/concrete.rs b/src/policy/concrete.rs index b5eec2215..f1401c583 100644 --- a/src/policy/concrete.rs +++ b/src/policy/concrete.rs @@ -856,7 +856,7 @@ impl Policy { .map_err(From::from) .map_err(Error::Parse)? } else { - (None, top.name) + (None, top.name()) }; let frag_prob = match frag_prob { @@ -906,8 +906,7 @@ impl Policy { .map_err(From::from) .map_err(Error::Parse)?; let subs = top - .args - .iter() + .children() .map(|arg| Self::from_tree(arg).map(Arc::new)) .collect::>()?; Ok(Policy::And(subs)) @@ -917,8 +916,7 @@ impl Policy { .map_err(From::from) .map_err(Error::Parse)?; let subs = top - .args - .iter() + .children() .map(|arg| { Self::from_tree_prob(arg, true).map(|(prob, sub)| (prob, Arc::new(sub))) }) diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs index e23851292..6f16b3d67 100644 --- a/src/policy/semantic.rs +++ b/src/policy/semantic.rs @@ -285,7 +285,7 @@ serde_string_impl_pk!(Policy, "a miniscript semantic policy"); impl expression::FromTree for Policy { fn from_tree(top: &expression::Tree) -> Result, Error> { - match top.name { + match top.name() { "UNSATISFIABLE" => { top.verify_n_children("UNSATISFIABLE", 0..=0) .map_err(From::from) @@ -325,8 +325,7 @@ impl expression::FromTree for Policy { .map_err(From::from) .map_err(Error::Parse)?; let subs = top - .args - .iter() + .children() .map(|arg| Self::from_tree(arg).map(Arc::new)) .collect::, Error>>()?; Ok(Policy::Thresh(Threshold::new(subs.len(), subs).map_err(Error::Threshold)?)) @@ -336,8 +335,7 @@ impl expression::FromTree for Policy { .map_err(From::from) .map_err(Error::Parse)?; let subs = top - .args - .iter() + .children() .map(|arg| Self::from_tree(arg).map(Arc::new)) .collect::, Error>>()?; Ok(Policy::Thresh(Threshold::new(1, subs).map_err(Error::Threshold)?)) From d05a034376dfd081c766a5965e72c3ba8056d28e Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 2 Sep 2024 19:53:03 +0000 Subject: [PATCH 3/5] expression: rewrite Tree module to no longer use a recursive data type This significantly speeds up and simplifies tree parsing, at the cost of having a more complicated API (but we mostly addressed the API question in the previous commits). This completely eliminates recursion for the Tree data type, including in the Drop impl. Big diff but there are only two "real" changes -- expression/mod.rs is substantially rewritten of course since we replace the core datatype, and Tr::from_tree is substantially rewritten since doing so was the point of this change. The rest of the changes are mechanically changing the signature of expression::FromTree::from_tree everywhere. --- src/descriptor/bare.rs | 12 +- src/descriptor/mod.rs | 4 +- src/descriptor/segwitv0.rs | 8 +- src/descriptor/sh.rs | 4 +- src/descriptor/sortedmulti.rs | 2 +- src/descriptor/tr.rs | 142 ++++----- src/expression/mod.rs | 585 +++++++++++++++++++++++++--------- src/miniscript/astelem.rs | 30 +- src/miniscript/mod.rs | 12 +- src/policy/concrete.rs | 8 +- src/policy/semantic.rs | 4 +- 11 files changed, 536 insertions(+), 275 deletions(-) diff --git a/src/descriptor/bare.rs b/src/descriptor/bare.rs index 3cb0da61e..b41ee2850 100644 --- a/src/descriptor/bare.rs +++ b/src/descriptor/bare.rs @@ -175,8 +175,8 @@ impl Liftable for Bare { } impl FromTree for Bare { - fn from_tree(top: &expression::Tree) -> Result { - let sub = Miniscript::::from_tree(top)?; + fn from_tree(root: expression::TreeIterItem) -> Result { + let sub = Miniscript::::from_tree(root)?; BareCtx::top_level_checks(&sub)?; Bare::new(sub) } @@ -186,7 +186,7 @@ impl core::str::FromStr for Bare { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } @@ -369,8 +369,8 @@ impl Liftable for Pkh { } impl FromTree for Pkh { - fn from_tree(top: &expression::Tree) -> Result { - let pk = top + fn from_tree(root: expression::TreeIterItem) -> Result { + let pk = root .verify_terminal_parent("pkh", "public key") .map_err(Error::Parse)?; Pkh::new(pk).map_err(Error::ContextError) @@ -381,7 +381,7 @@ impl core::str::FromStr for Pkh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs index f9e673c2b..1b6bc788c 100644 --- a/src/descriptor/mod.rs +++ b/src/descriptor/mod.rs @@ -965,7 +965,7 @@ impl Descriptor { impl crate::expression::FromTree for Descriptor { /// Parse an expression tree into a descriptor. - fn from_tree(top: &expression::Tree) -> Result, Error> { + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { Ok(match (top.name(), top.n_children()) { ("pkh", 1) => Descriptor::Pkh(Pkh::from_tree(top)?), ("wpkh", 1) => Descriptor::Wpkh(Wpkh::from_tree(top)?), @@ -981,7 +981,7 @@ impl FromStr for Descriptor { type Err = Error; fn from_str(s: &str) -> Result, Error> { let top = expression::Tree::from_str(s)?; - let ret = Self::from_tree(&top)?; + let ret = Self::from_tree(top.root())?; if let Descriptor::Tr(ref inner) = ret { // FIXME preserve weird/broken behavior from 12.x. // See https://github.com/rust-bitcoin/rust-miniscript/issues/734 diff --git a/src/descriptor/segwitv0.rs b/src/descriptor/segwitv0.rs index b09a9f887..8ad173b76 100644 --- a/src/descriptor/segwitv0.rs +++ b/src/descriptor/segwitv0.rs @@ -247,7 +247,7 @@ impl Liftable for Wsh { } impl crate::expression::FromTree for Wsh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let top = top .verify_toplevel("wsh", 1..=1) .map_err(From::from) @@ -284,7 +284,7 @@ impl core::str::FromStr for Wsh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Wsh::::from_tree(&top) + Wsh::::from_tree(top.root()) } } @@ -483,7 +483,7 @@ impl Liftable for Wpkh { } impl crate::expression::FromTree for Wpkh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let pk = top .verify_terminal_parent("wpkh", "public key") .map_err(Error::Parse)?; @@ -495,7 +495,7 @@ impl core::str::FromStr for Wpkh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/sh.rs b/src/descriptor/sh.rs index f00e3f093..8b0195e8f 100644 --- a/src/descriptor/sh.rs +++ b/src/descriptor/sh.rs @@ -81,7 +81,7 @@ impl fmt::Display for Sh { } impl crate::expression::FromTree for Sh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let top = top .verify_toplevel("sh", 1..=1) .map_err(From::from) @@ -105,7 +105,7 @@ impl core::str::FromStr for Sh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/sortedmulti.rs b/src/descriptor/sortedmulti.rs index 85664635e..ff3f62227 100644 --- a/src/descriptor/sortedmulti.rs +++ b/src/descriptor/sortedmulti.rs @@ -59,7 +59,7 @@ impl SortedMultiVec { } /// Parse an expression tree into a SortedMultiVec - pub fn from_tree(tree: &expression::Tree) -> Result + pub fn from_tree(tree: expression::TreeIterItem) -> Result where Pk: FromStrKey, { diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs index ba68a73fa..ab385c940 100644 --- a/src/descriptor/tr.rs +++ b/src/descriptor/tr.rs @@ -14,7 +14,6 @@ use sync::Arc; use super::checksum; use crate::descriptor::DefiniteDescriptorKey; use crate::expression::{self, FromTree}; -use crate::iter::TreeLike as _; use crate::miniscript::satisfy::{Placeholder, Satisfaction, SchnorrSigType, Witness}; use crate::miniscript::Miniscript; use crate::plan::AssetProvider; @@ -495,99 +494,84 @@ impl core::str::FromStr for Tr { fn from_str(s: &str) -> Result { let expr_tree = expression::Tree::from_str(s)?; - Self::from_tree(&expr_tree) + Self::from_tree(expr_tree.root()) } } impl crate::expression::FromTree for Tr { - fn from_tree(expr_tree: &expression::Tree) -> Result { + fn from_tree(root: expression::TreeIterItem) -> Result { use crate::expression::{Parens, ParseTreeError}; - expr_tree - .verify_toplevel("tr", 1..=2) + struct TreeStack<'s, Pk: MiniscriptKey> { + inner: Vec<(expression::TreeIterItem<'s>, TapTree)>, + } + + impl<'s, Pk: MiniscriptKey> TreeStack<'s, Pk> { + fn new() -> Self { Self { inner: Vec::with_capacity(128) } } + + fn push(&mut self, parent: expression::TreeIterItem<'s>, tree: TapTree) { + let mut next_push = (parent, tree); + while let Some(top) = self.inner.pop() { + if next_push.0.index() == top.0.index() { + next_push.0 = top.0.parent().unwrap(); + next_push.1 = TapTree::combine(top.1, next_push.1); + } else { + self.inner.push(top); + break; + } + } + self.inner.push(next_push); + } + + fn pop_final(&mut self) -> Option> { + assert_eq!(self.inner.len(), 1); + self.inner.pop().map(|x| x.1) + } + } + + root.verify_toplevel("tr", 1..=2) .map_err(From::from) .map_err(Error::Parse)?; - let mut round_paren_depth = 0; + let mut root_children = root.children(); + let internal_key: Pk = root_children + .next() + .unwrap() // `verify_toplevel` above checked that first child existed + .verify_terminal("internal key") + .map_err(Error::Parse)?; - let mut internal_key = None; - let mut tree_stack = vec![]; + let tap_tree = match root_children.next() { + None => return Tr::new(internal_key, None), + Some(tree) => tree, + }; - for item in expr_tree.verbose_pre_order_iter() { - // Top-level "tr" node. - if item.index == 0 { - if item.is_complete { - debug_assert!( - internal_key.is_some(), - "checked above that top-level 'tr' has children" - ); + let mut tree_stack = TreeStack::new(); + let mut tap_tree_iter = tap_tree.pre_order_iter(); + // while let construction needed because we modify the iterator inside the loop + // (by calling skip_descendants to skip over the contents of the tapscripts). + while let Some(node) = tap_tree_iter.next() { + if node.parens() == Parens::Curly { + if !node.name().is_empty() { + return Err(Error::Parse(ParseError::Tree(ParseTreeError::IncorrectName { + actual: node.name().to_owned(), + expected: "", + }))); } - } else if item.index == 1 { - // First child of tr, which must be the internal key - internal_key = item - .node - .verify_terminal("internal key") - .map_err(Error::Parse) - .map(Some)?; + node.verify_n_children("taptree branch", 2..=2) + .map_err(From::from) + .map_err(Error::Parse)?; } else { - // From here on we are into the taptree. - if item.n_children_yielded == 0 { - match item.node.parens() { - Parens::Curly => { - if !item.node.name().is_empty() { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IncorrectName { - actual: item.node.name().to_owned(), - expected: "", - }, - ))); - } - if round_paren_depth > 0 { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IllegalCurlyBrace { - pos: item.node.children_pos(), - }, - ))); - } - } - Parens::Round => round_paren_depth += 1, - _ => {} - } - } - if item.is_complete { - if item.node.parens() == Parens::Curly { - if item.n_children_yielded == 2 { - let rchild = tree_stack.pop().unwrap(); - let lchild = tree_stack.pop().unwrap(); - tree_stack.push(TapTree::combine(lchild, rchild)); - } else { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IncorrectNumberOfChildren { - description: "Taptree node", - n_children: item.n_children_yielded, - minimum: Some(2), - maximum: Some(2), - }, - ))); - } - } else { - if item.node.parens() == Parens::Round { - round_paren_depth -= 1; - } - if round_paren_depth == 0 { - let script = Miniscript::from_tree(item.node)?; - // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 - if script.ty.corr.base != crate::miniscript::types::Base::B { - return Err(Error::NonTopLevel(format!("{:?}", script))); - }; - tree_stack.push(TapTree::Leaf(Arc::new(script))); - } - } - } + let script = Miniscript::from_tree(node)?; + // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 + if script.ty.corr.base != crate::miniscript::types::Base::B { + return Err(Error::NonTopLevel(format!("{:?}", script))); + }; + + tree_stack.push(node.parent().unwrap(), TapTree::Leaf(Arc::new(script))); + tap_tree_iter.skip_descendants(); } } - assert!(tree_stack.len() <= 1); - Tr::new(internal_key.unwrap(), tree_stack.pop()) + Tr::new(internal_key, tree_stack.pop_final()) } } diff --git a/src/expression/mod.rs b/src/expression/mod.rs index 390082cd1..7f7057095 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -1,6 +1,28 @@ // SPDX-License-Identifier: CC0-1.0 -//! # Function-like Expression Language +//! Expression Trees +//! +//! This module represents expression trees, which are trees whose nodes have +//! names and arbitrary numbers of children. As strings, they are defined by +//! the following rules: +//! +//! * Any sequence of valid descriptor characters, including the empty string, is a "name". +//! * A name is an expression (called a "leaf"). +//! * Given n expression trees `s_1`, ..., `s_n` and a name `X`, `X(s_1,...,s_n)` is an expression. +//! * Given n expression trees `s_1`, ..., `s_n` and a name `X`, `X{s_1,...,s_n}` is an expression. +//! +//! Note that while `leaf` and `leaf()` are both expressions, only the former is +//! actually a leaf. The latter has one child which is a leaf with an empty name. +//! If these are intended to be equivalent, the caller must add logic to do this +//! when converting the expression tree into its final type. +//! +//! All recursive structures in this library can be serialized and parsed as trees, +//! though of course each data structure further limits the grammar (e.g. to enforce +//! that names be valid Miniscript fragment names, public keys, hashes or timelocks). +//! +//! Users of this library probably do not need to use this module at all, unless they +//! are implementing their own Miniscript-like structures or extensions to Miniscript. +//! It is intended to be used as a utility to implement string parsing. //! mod error; @@ -11,53 +33,131 @@ use core::str::FromStr; pub use self::error::{ParseNumError, ParseThresholdError, ParseTreeError}; use crate::blanket_traits::StaticDebugAndDisplay; use crate::descriptor::checksum::verify_checksum; -use crate::iter::{self, TreeLike}; use crate::prelude::*; use crate::{AbsLockTime, Error, ParseError, RelLockTime, Threshold, MAX_RECURSION_DEPTH}; /// Allowed characters are descriptor strings. pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; -#[derive(Debug)] -/// A token of the form `x(...)` or `x` -pub struct Tree<'a> { - /// The name `x` - name: &'a str, - /// Position one past the last character of the node's name. If it has - /// children, the position of the '(' or '{'. - children_pos: usize, - /// The type of parentheses surrounding the node's children. +/// Internal data structure representing a node of an expression tree. +/// +/// Users of the public API will always interact with this using the +/// wrapper type [`TreeIterItem`] which also contains a reference to +/// the whole tree. +#[derive(Debug, PartialEq, Eq)] +struct TreeNode<'s> { + name: &'s str, + name_pos: usize, parens: Parens, - /// The comma-separated contents of the `(...)`, if any - args: Vec>, + n_children: usize, + index: usize, + parent_idx: Option, + last_child_idx: Option, + right_sibling_idx: Option, } -impl PartialEq for Tree<'_> { - fn eq(&self, other: &Self) -> bool { - let mut stack = vec![(self, other)]; - while let Some((me, you)) = stack.pop() { - if me.name != you.name || me.args.len() != you.args.len() { - return false; - } - stack.extend(me.args.iter().zip(you.args.iter())); +impl TreeNode<'_> { + fn null(index: usize) -> Self { + TreeNode { + name: "", + name_pos: 0, + parens: Parens::None, + n_children: 0, + index, + parent_idx: None, + last_child_idx: None, + right_sibling_idx: None, } - true } } -impl Eq for Tree<'_> {} - -impl<'a, 't> TreeLike for &'t Tree<'a> { - type NaryChildren = &'t [Tree<'a>]; - fn nary_len(tc: &Self::NaryChildren) -> usize { tc.len() } - fn nary_index(tc: Self::NaryChildren, idx: usize) -> Self { &tc[idx] } +/// An iterator over the nodes of a tree, in pre-order. +/// +/// This has several differences from the pre-order iterator provided by [`crate::iter::TreeLike`]: +/// +/// * this is double-ended, so a right-to-left post-order iterator can be obtained by `.rev()`. +/// * the yielded items represent sub-trees which themselves can be iterated from +/// * the iterator can be told to skip all descendants of the current node, using +/// [`PreOrderIter::skip_descendants`]. +pub struct PreOrderIter<'s> { + nodes: &'s [TreeNode<'s>], + inner: core::ops::RangeInclusive, +} - fn as_node(&self) -> iter::Tree { - if self.args.is_empty() { - iter::Tree::Nullary - } else { - iter::Tree::Nary(&self.args) +impl PreOrderIter<'_> { + /// Skip all the descendants of the most recently-yielded item. + /// + /// Here "most recently-yielded item" means the most recently-yielded item when + /// running the iterator forward. If you run the iterator backward, e.g. by iterating + /// on `iter.by_ref().rev()`, those items are not considered, and the resulting + /// behavior of this function may be surprising. + /// + /// If this method is called before any nodes have been yielded, the entire iterator + /// will be skipped. + pub fn skip_descendants(&mut self) { + if self.inner.is_empty() { + return; } + + let last_index = self.inner.start().saturating_sub(1); + // Construct a synthetic iterator over all descendants + let last_item = TreeIterItem { nodes: self.nodes, index: last_index }; + let skip_past = last_item.rightmost_descendant_idx(); + // ...and copy the indices out of that. + debug_assert!(skip_past + 1 >= *self.inner.start()); + debug_assert!(skip_past <= *self.inner.end()); + self.inner = skip_past + 1..=*self.inner.end(); + } +} + +impl<'s> Iterator for PreOrderIter<'s> { + type Item = TreeIterItem<'s>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|n| TreeIterItem { nodes: self.nodes, index: n }) + } + + fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } +} + +impl DoubleEndedIterator for PreOrderIter<'_> { + fn next_back(&mut self) -> Option { + self.inner + .next_back() + .map(|n| TreeIterItem { nodes: self.nodes, index: n }) + } +} + +impl ExactSizeIterator for PreOrderIter<'_> { + // The inner `RangeInclusive` does not impl ExactSizeIterator because the + // range 0..=usize::MAX would have length usize::MAX + 1. But we know + // that our range is limited by the `n_nodes` variable returned by + // `parse_pre_check`, and if THAT didn't overflow then this won't either. +} + +/// A tree node, as yielded from an iterator. +#[derive(Copy, Clone)] +pub struct TreeIterItem<'s> { + nodes: &'s [TreeNode<'s>], + index: usize, +} + +/// An iterator over the direct children of a tree node. +pub struct DirectChildIterator<'s> { + current: Option>, +} + +impl<'s> Iterator for DirectChildIterator<'s> { + type Item = TreeIterItem<'s>; + + fn next(&mut self) -> Option { + let item = self.current.take()?; + self.current = item.nodes[item.index] + .right_sibling_idx + .map(|n| TreeIterItem { nodes: item.nodes, index: n }); + Some(item) } } @@ -75,35 +175,87 @@ pub enum Parens { /// A trait for extracting a structure from a Tree representation in token form pub trait FromTree: Sized { /// Extract a structure from Tree representation - fn from_tree(top: &Tree) -> Result; + fn from_tree(root: TreeIterItem) -> Result; } -impl<'a> Tree<'a> { +impl<'s> TreeIterItem<'s> { /// The name of this tree node. - pub fn name(&self) -> &str { self.name } + pub fn name(self) -> &'s str { self.nodes[self.index].name } /// The 0-indexed byte-position of the name in the original expression tree. - pub fn name_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + pub fn name_pos(self) -> usize { self.nodes[self.index].name_pos } /// The 0-indexed byte-position of the '(' or '{' character which starts the /// expression's children. /// /// If the expression has no children, returns one past the end of the name. - pub fn children_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + pub fn children_pos(self) -> usize { self.name_pos() + self.name().len() + 1 } /// The number of children this node has. - pub fn n_children(&self) -> usize { self.args.len() } + pub fn n_children(self) -> usize { self.nodes[self.index].n_children } /// The type of parenthesis surrounding this node's children. /// /// If the node has no children, this will be `Parens::None`. - pub fn parens(&self) -> Parens { self.parens } + pub fn parens(self) -> Parens { self.nodes[self.index].parens } /// An iterator over the direct children of this node. /// - /// If you want to iterate recursively, use the [`TreeLike`] API which - /// provides methods `pre_order_iter` and `post_order_iter`. - pub fn children(&self) -> impl ExactSizeIterator { self.args.iter() } + /// If you want to iterate recursively, use the [`Self::pre_order_iter`] + /// or [`Self::rtl_post_order_iter`] method. + pub fn children(self) -> DirectChildIterator<'s> { + DirectChildIterator { current: self.first_child() } + } + + /// The index of the node in its underlying tree. + pub fn index(&self) -> usize { self.index } + + /// Accessor for the parent of the node, if it has a parent (is not the root). + pub fn parent(self) -> Option { + self.nodes[self.index] + .parent_idx + .map(|n| Self { nodes: self.nodes, index: n }) + } + + /// Whether the node is the first child of its parent. + /// + /// Returns false for the root. + pub fn is_first_child(self) -> bool { + self.nodes[self.index] + .parent_idx + .map(|n| n + 1 == self.index) + .unwrap_or(false) + } + + /// Accessor for the first child of the node, if it has a first child. + pub fn first_child(self) -> Option { + // If the node has any children at all, its first child is the one right after it. + self.nodes[self.index] + .last_child_idx + .map(|_| Self { nodes: self.nodes, index: self.index + 1 }) + } + + /// Accessor for the sibling of the node, if it has one. + pub fn right_sibling(self) -> Option { + self.nodes[self.index] + .right_sibling_idx + .map(|n| Self { nodes: self.nodes, index: n }) + } + + /// Helper function to find the rightmost descendant of a node. + /// + /// Used to construct iterators which cover only the node and its descendants. + /// If the node has no descendants, returns its own index. + fn rightmost_descendant_idx(self) -> usize { + let mut scan = self.index; + while let Some(idx) = self.nodes[scan].last_child_idx { + scan = idx; + while let Some(idx) = self.nodes[scan].right_sibling_idx { + scan = idx; + } + } + scan + } /// Split the name by a separating character. /// @@ -111,15 +263,18 @@ impl<'a> Tree<'a> { /// the suffix after the separator. Otherwise returns the whole name. /// /// If the separator occurs multiple times, returns an error. - pub fn name_separated(&self, separator: char) -> Result<(Option<&str>, &str), ParseTreeError> { - let mut name_split = self.name.splitn(3, separator); + pub fn name_separated( + self, + separator: char, + ) -> Result<(Option<&'s str>, &'s str), ParseTreeError> { + let mut name_split = self.name().splitn(3, separator); match (name_split.next(), name_split.next(), name_split.next()) { (None, _, _) => unreachable!("'split' always yields at least one element"), - (Some(_), None, _) => Ok((None, self.name)), + (Some(_), None, _) => Ok((None, self.name())), (Some(prefix), Some(name), None) => Ok((Some(prefix), name)), (Some(_), Some(_), Some(suffix)) => Err(ParseTreeError::MultipleSeparators { separator, - pos: self.children_pos - suffix.len() - 1, + pos: self.children_pos() - suffix.len() - 1, }), } } @@ -129,7 +284,7 @@ impl<'a> Tree<'a> { /// The `description` argument is only used to populate the error return, /// and is not validated in any way. pub fn verify_n_children( - &self, + self, description: &'static str, n_children: impl ops::RangeBounds, ) -> Result<(), ParseTreeError> { @@ -167,19 +322,19 @@ impl<'a> Tree<'a> { &self, name: &'static str, n_children: impl ops::RangeBounds, - ) -> Result<&Self, ParseTreeError> { + ) -> Result { assert!( !n_children.contains(&0), "verify_toplevel is intended for nodes with >= 1 child" ); - if self.name != name { - Err(ParseTreeError::IncorrectName { actual: self.name.to_owned(), expected: name }) - } else if self.parens == Parens::Curly { - Err(ParseTreeError::IllegalCurlyBrace { pos: self.children_pos }) + if self.name() != name { + Err(ParseTreeError::IncorrectName { actual: self.name().to_owned(), expected: name }) + } else if self.parens() == Parens::Curly { + Err(ParseTreeError::IllegalCurlyBrace { pos: self.children_pos() }) } else { self.verify_n_children(name, n_children)?; - Ok(&self.args[0]) + Ok(self.first_child().unwrap()) } } @@ -191,10 +346,11 @@ impl<'a> Tree<'a> { pub fn verify_after(&self) -> Result { self.verify_n_children("after", 1..=1) .map_err(ParseError::Tree)?; - self.args[0] + let child = self.first_child().unwrap(); + child .verify_n_children("absolute locktime", 0..=0) .map_err(ParseError::Tree)?; - parse_num(self.args[0].name) + parse_num(child.name()) .map_err(ParseError::Num) .and_then(|n| AbsLockTime::from_consensus(n).map_err(ParseError::AbsoluteLockTime)) } @@ -207,10 +363,11 @@ impl<'a> Tree<'a> { pub fn verify_older(&self) -> Result { self.verify_n_children("older", 1..=1) .map_err(ParseError::Tree)?; - self.args[0] + let child = self.first_child().unwrap(); + child .verify_n_children("relative locktime", 0..=0) .map_err(ParseError::Tree)?; - parse_num(self.args[0].name) + parse_num(child.name()) .map_err(ParseError::Num) .and_then(|n| RelLockTime::from_consensus(n).map_err(ParseError::RelativeLockTime)) } @@ -228,7 +385,7 @@ impl<'a> Tree<'a> { { self.verify_n_children(description, 0..=0) .map_err(ParseError::Tree)?; - T::from_str(self.name).map_err(ParseError::box_from_str) + T::from_str(self.name()).map_err(ParseError::box_from_str) } /// Check that a tree node has exactly one child, which is a terminal. @@ -248,7 +405,9 @@ impl<'a> Tree<'a> { { self.verify_n_children(description, 1..=1) .map_err(ParseError::Tree)?; - self.args[0].verify_terminal(inner_description) + self.first_child() + .unwrap() + .verify_terminal(inner_description) } /// Check that a tree node has exactly two children. @@ -257,12 +416,11 @@ impl<'a> Tree<'a> { /// /// The `description` argument is only used to populate the error return, /// and is not validated in any way. - pub fn verify_binary( - &self, - description: &'static str, - ) -> Result<(&Self, &Self), ParseTreeError> { + pub fn verify_binary(&self, description: &'static str) -> Result<(Self, Self), ParseTreeError> { self.verify_n_children(description, 2..=2)?; - Ok((&self.args[0], &self.args[1])) + let first_child = self.first_child().unwrap(); + let second_child = first_child.right_sibling().unwrap(); + Ok((first_child, second_child)) } /// Parses an expression tree as a threshold (a term with at least one child, @@ -279,11 +437,11 @@ impl<'a> Tree<'a> { /// and be able to return multiple error types.) pub fn verify_threshold< const MAX: usize, - F: FnMut(&Self) -> Result, + F: FnMut(Self) -> Result, T, E: From, >( - &self, + &'s self, mut map_child: F, ) -> Result, E> { let mut child_iter = self.children(); @@ -303,26 +461,59 @@ impl<'a> Tree<'a> { .and_then(|thresh| thresh.translate_by_index(|_| map_child(child_iter.next().unwrap()))) } + /// Returns an iterator over the nodes of the tree, in pre-order. + /// + /// Constructing the iterator takes O(depth) time. + pub fn pre_order_iter(&'s self) -> PreOrderIter<'s> { + PreOrderIter { nodes: self.nodes, inner: self.index..=self.rightmost_descendant_idx() } + } + + /// Returns an iterator over the nodes of the tree, in right-to-left post-order. + pub fn rtl_post_order_iter(&'s self) -> core::iter::Rev> { + self.pre_order_iter().rev() + } + /// Check that a tree has no curly-brace children in it. pub fn verify_no_curly_braces(&self) -> Result<(), ParseTreeError> { - for tree in self.pre_order_iter() { - if tree.parens == Parens::Curly { - return Err(ParseTreeError::IllegalCurlyBrace { pos: tree.children_pos }); + for node in self.rtl_post_order_iter() { + if node.parens() == Parens::Curly { + return Err(ParseTreeError::IllegalCurlyBrace { pos: node.children_pos() }); } } Ok(()) } +} + +#[derive(Debug, PartialEq, Eq)] +/// A parsed expression tree. See module-level documentation for syntax. +pub struct Tree<'s> { + /// The nodes, stored in pre-order. + nodes: Vec>, +} + +impl<'a> Tree<'a> { + /// Returns the root node of the tree, or `None` if the tree is empty. + pub fn root(&'a self) -> TreeIterItem<'a> { + assert_ne!( + self.nodes.len(), + 0, + "trees cannot be empty; the empty string parses as a single root with empty name" + ); + TreeIterItem { nodes: &self.nodes, index: 0 } + } /// Check that a string is a well-formed expression string, with optional /// checksum. /// - /// Returns the string with the checksum removed and its tree depth. - fn parse_pre_check(s: &str) -> Result<(&str, usize), ParseTreeError> { + /// Returns the string with the checksum removed, the maximum depth, and the + /// number of nodes in the tree. + fn parse_pre_check(s: &str) -> Result<(&str, usize, usize), ParseTreeError> { // First, scan through string to make sure it is well-formed. // Do ASCII/checksum check first; after this we can use .bytes().enumerate() rather // than .char_indices(), which is *significantly* faster. let s = verify_checksum(s)?; + let mut n_nodes = 1; let mut max_depth = 0; let mut open_paren_stack = Vec::with_capacity(128); for (pos, ch) in s.bytes().enumerate() { @@ -380,9 +571,15 @@ impl<'a> Tree<'a> { // now. return Err(ParseTreeError::UnmatchedCloseParen { ch: ch.into(), pos }); } - } else if ch == b',' && open_paren_stack.is_empty() { - // We consider commas outside of the tree to be "trailing characters" - return Err(ParseTreeError::TrailingCharacter { ch: ch.into(), pos }); + + n_nodes += 1; + } else if ch == b',' { + if open_paren_stack.is_empty() { + // We consider commas outside of the tree to be "trailing characters" + return Err(ParseTreeError::TrailingCharacter { ch: ch.into(), pos }); + } + + n_nodes += 1; } } // Catch "early end of string" @@ -399,7 +596,7 @@ impl<'a> Tree<'a> { }); } - Ok((s, max_depth)) + Ok((s, max_depth, n_nodes)) } /// Parses a tree from a string @@ -411,61 +608,73 @@ impl<'a> Tree<'a> { } fn from_str_inner(s: &'a str) -> Result { + fn new_node<'a>(nodes: &mut [TreeNode<'a>], stack: &[usize], pos: usize) -> TreeNode<'a> { + let parent_idx = stack.last().copied(); + if let Some(idx) = parent_idx { + nodes[idx].n_children += 1; + nodes[idx].last_child_idx = Some(nodes.len()); + } + + let mut new = TreeNode::null(nodes.len()); + new.name_pos = pos; + new.parent_idx = parent_idx; + new + } + // First, scan through string to make sure it is well-formed. - let (s, max_depth) = Self::parse_pre_check(s)?; - - // Now, knowing it is sane and well-formed, we can easily parse it backward, - // which will yield a post-order right-to-left iterator of its nodes. - let mut stack = Vec::with_capacity(max_depth); - let mut children_parens: Option<(Vec<_>, usize, Parens)> = None; - let mut node_name_end = s.len(); - for (pos, ch) in s.bytes().enumerate().rev() { - if ch == b')' || ch == b'}' { - stack.push(vec![]); - node_name_end = pos; + let (s, max_depth, n_nodes) = Self::parse_pre_check(s)?; + + let mut nodes = Vec::with_capacity(n_nodes); + + // Now, knowing it is sane and well-formed, we can easily parse it forward, + // as the string serialization lists all the nodes in pre-order. + let mut parent_stack = Vec::with_capacity(max_depth); + let mut current_node = Some(TreeNode::null(0)); + for (pos, ch) in s.bytes().enumerate() { + if ch == b'(' || ch == b'{' { + let mut current = current_node.expect("'(' only occurs after a node name"); + current.name = &s[current.name_pos..pos]; + current.parens = match ch { + b'(' => Parens::Round, + b'{' => Parens::Curly, + _ => unreachable!(), + }; + parent_stack.push(nodes.len()); + nodes.push(current); + + current_node = Some(new_node(&mut nodes, &parent_stack, pos + 1)); } else if ch == b',' { - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - - let top = stack.last_mut().unwrap(); - let new_tree = - Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; - top.push(new_tree); - node_name_end = pos; - } else if ch == b'(' || ch == b'{' { - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - - let mut top = stack.pop().unwrap(); - let new_tree = - Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; - top.push(new_tree); - children_parens = Some(( - top, - pos, - match ch { - b'(' => Parens::Round, - b'{' => Parens::Curly, - _ => unreachable!(), - }, - )); - node_name_end = pos; + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..pos]; + nodes.push(current); + } + + if let Some(last_sib_idx) = + parent_stack.last().and_then(|n| nodes[*n].last_child_idx) + { + nodes[last_sib_idx].right_sibling_idx = Some(nodes.len()); + } + current_node = Some(new_node(&mut nodes, &parent_stack, pos + 1)); + } else if ch == b')' || ch == b'}' { + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..pos]; + nodes.push(current); + } + + current_node = None; + parent_stack.pop(); } } + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..]; + nodes.push(current); + } + + assert_eq!(parent_stack.capacity(), max_depth); + assert_eq!(nodes.capacity(), n_nodes); + assert_eq!(nodes.len(), nodes.capacity()); - assert_eq!(stack.len(), 0); - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - Ok(Tree { name: &s[..node_name_end], children_pos, parens, args }) + Ok(Tree { nodes }) } } @@ -488,29 +697,76 @@ mod tests { use super::*; use crate::ParseError; - /// Test functions to manually build trees - fn leaf(name: &str) -> Tree { - Tree { name, parens: Parens::None, children_pos: name.len(), args: vec![] } + struct NodeBuilder<'a> { + inner: Vec>, + sibling_stack: Vec>, + parent_stack: Vec, + str_idx: usize, } - fn paren_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { - let mut offset = name.len() + 1; // +1 for open paren - for arg in &mut args { - arg.children_pos += offset; - offset += arg.name.len() + 1; // +1 for comma + impl<'a> NodeBuilder<'a> { + fn new() -> Self { + NodeBuilder { + inner: vec![], + sibling_stack: vec![None], + parent_stack: vec![], + str_idx: 0, + } } - Tree { name, parens: Parens::Round, children_pos: name.len(), args } - } + fn new_node_internal(&mut self, name: &'a str) -> TreeNode<'a> { + let mut new = TreeNode::null(self.inner.len()); + if let Some(idx) = self.parent_stack.last().copied() { + self.inner[idx].n_children += 1; + self.inner[idx].last_child_idx = Some(self.inner.len()); + new.parent_idx = Some(idx); + } + if let Some(idx) = self.sibling_stack.last().unwrap() { + self.inner[*idx].right_sibling_idx = Some(self.inner.len()); + self.str_idx += 1; + } + new.name = name; + new.name_pos = self.str_idx; + + *self.sibling_stack.last_mut().unwrap() = Some(self.inner.len()); + self.str_idx += name.len(); + new + } + + fn leaf(mut self, name: &'a str) -> Self { + let new = self.new_node_internal(name); - fn brace_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { - let mut offset = name.len() + 1; // +1 for open paren - for arg in &mut args { - arg.children_pos += offset; - offset += arg.name.len() + 1; // +1 for comma + self.inner.push(new); + self } - Tree { name, parens: Parens::Curly, children_pos: name.len(), args } + fn open(mut self, name: &'a str, paren: char) -> Self { + let mut new = self.new_node_internal(name); + + new.parens = match paren { + '(' => Parens::Round, + '{' => Parens::Curly, + _ => panic!(), + }; + self.str_idx += 1; + + self.parent_stack.push(self.inner.len()); + self.sibling_stack.push(None); + self.inner.push(new); + self + } + + fn close(mut self) -> Self { + self.str_idx += 1; + self.parent_stack.pop(); + self.sibling_stack.pop(); + self + } + + fn into_tree(self) -> Tree<'a> { + assert_eq!(self.parent_stack.len(), 0); + Tree { nodes: self.inner } + } } #[test] @@ -525,7 +781,10 @@ mod tests { #[test] fn parse_tree_basic() { - assert_eq!(Tree::from_str("thresh").unwrap(), leaf("thresh")); + assert_eq!( + Tree::from_str("thresh").unwrap(), + NodeBuilder::new().leaf("thresh").into_tree() + ); assert!(matches!( Tree::from_str("thresh,").unwrap_err(), @@ -542,7 +801,14 @@ mod tests { Error::Parse(ParseError::Tree(ParseTreeError::TrailingCharacter { ch: 't', pos: 8 })), )); - assert_eq!(Tree::from_str("thresh()").unwrap(), paren_node("thresh", vec![leaf("")])); + assert_eq!( + Tree::from_str("thresh()").unwrap(), + NodeBuilder::new() + .open("thresh", '(') + .leaf("") + .close() + .into_tree() + ); assert!(matches!( Tree::from_str("thresh(a()b)"), @@ -612,7 +878,14 @@ mod tests { fn parse_tree_taproot() { assert_eq!( Tree::from_str("a{b(c),d}").unwrap(), - brace_node("a", vec![paren_node("b", vec![leaf("c")]), leaf("d")]), + NodeBuilder::new() + .open("a", '{') + .open("b", '(') + .leaf("c") + .close() + .leaf("d") + .close() + .into_tree() ); } @@ -626,16 +899,18 @@ mod tests { assert_eq!( Tree::from_str(&desc).unwrap(), - paren_node( - "wsh", - vec![paren_node( - "t:or_c", - vec![ - paren_node("pk", vec![leaf(keys[0])]), - paren_node("v:pkh", vec![leaf(keys[1])]), - ] - )] - ), + NodeBuilder::new() + .open("wsh", '(') + .open("t:or_c", '(') + .open("pk", '(') + .leaf(keys[0]) + .close() + .open("v:pkh", '(') + .leaf(keys[1]) + .close() + .close() + .close() + .into_tree() ); } } diff --git a/src/miniscript/astelem.rs b/src/miniscript/astelem.rs index 779ca7122..c990f31fa 100644 --- a/src/miniscript/astelem.rs +++ b/src/miniscript/astelem.rs @@ -18,24 +18,26 @@ use crate::util::MsKeyBuilder; use crate::{expression, Error, FromStrKey, Miniscript, MiniscriptKey, Terminal, ToPublicKey}; impl crate::expression::FromTree for Arc> { - fn from_tree(top: &expression::Tree) -> Result>, Error> { - Ok(Arc::new(expression::FromTree::from_tree(top)?)) + fn from_tree(root: expression::TreeIterItem) -> Result>, Error> { + Ok(Arc::new(expression::FromTree::from_tree(root)?)) } } impl crate::expression::FromTree for Terminal { - fn from_tree(top: &expression::Tree) -> Result, Error> { - let binary = - |node: &expression::Tree, name, termfn: fn(_, _) -> Self| -> Result { - node.verify_binary(name) - .map_err(From::from) - .map_err(Error::Parse) - .and_then(|(x, y)| { - let x = Arc::>::from_tree(x)?; - let y = Arc::>::from_tree(y)?; - Ok(termfn(x, y)) - }) - }; + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { + let binary = |node: expression::TreeIterItem, + name, + termfn: fn(_, _) -> Self| + -> Result { + node.verify_binary(name) + .map_err(From::from) + .map_err(Error::Parse) + .and_then(|(x, y)| { + let x = Arc::>::from_tree(x)?; + let y = Arc::>::from_tree(y)?; + Ok(termfn(x, y)) + }) + }; let (frag_wrap, frag_name) = top .name_separated(':') diff --git a/src/miniscript/mod.rs b/src/miniscript/mod.rs index b47d19e71..2bbfc4822 100644 --- a/src/miniscript/mod.rs +++ b/src/miniscript/mod.rs @@ -725,7 +725,7 @@ impl Miniscript { pub fn from_str_ext(s: &str, ext: &ExtParams) -> Result, Error> { // This checks for invalid ASCII chars let top = expression::Tree::from_str(s)?; - let ms: Miniscript = expression::FromTree::from_tree(&top)?; + let ms: Miniscript = expression::FromTree::from_tree(top.root())?; ms.ext_check(ext)?; if ms.ty.corr.base != types::Base::B { @@ -737,19 +737,19 @@ impl Miniscript { } impl crate::expression::FromTree for Arc> { - fn from_tree(top: &expression::Tree) -> Result>, Error> { - Ok(Arc::new(expression::FromTree::from_tree(top)?)) + fn from_tree(root: expression::TreeIterItem) -> Result>, Error> { + Ok(Arc::new(expression::FromTree::from_tree(root)?)) } } impl crate::expression::FromTree for Miniscript { /// Parse an expression tree into a Miniscript. As a general rule, this /// should not be called directly; rather go through the descriptor API. - fn from_tree(top: &expression::Tree) -> Result, Error> { - top.verify_no_curly_braces() + fn from_tree(root: expression::TreeIterItem) -> Result, Error> { + root.verify_no_curly_braces() .map_err(From::from) .map_err(Error::Parse)?; - let inner: Terminal = expression::FromTree::from_tree(top)?; + let inner: Terminal = expression::FromTree::from_tree(root)?; Miniscript::from_ast(inner) } } diff --git a/src/policy/concrete.rs b/src/policy/concrete.rs index f1401c583..8826b3cf7 100644 --- a/src/policy/concrete.rs +++ b/src/policy/concrete.rs @@ -835,7 +835,7 @@ impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { let tree = expression::Tree::from_str(s)?; - let policy: Policy = FromTree::from_tree(&tree)?; + let policy: Policy = FromTree::from_tree(tree.root())?; policy.check_timelocks().map_err(Error::ConcretePolicy)?; Ok(policy) } @@ -847,7 +847,7 @@ impl Policy { /// Helper function for `from_tree` to parse subexpressions with /// names of the form x@y fn from_tree_prob( - top: &expression::Tree, + top: expression::TreeIterItem, allow_prob: bool, ) -> Result<(usize, Policy), Error> { // When 'allow_prob' is true we parse '@' signs out of node names. @@ -935,8 +935,8 @@ impl Policy { } impl expression::FromTree for Policy { - fn from_tree(top: &expression::Tree) -> Result, Error> { - Policy::from_tree_prob(top, false).map(|(_, result)| result) + fn from_tree(root: expression::TreeIterItem) -> Result, Error> { + Policy::from_tree_prob(root, false).map(|(_, result)| result) } } diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs index 6f16b3d67..0c97c59c7 100644 --- a/src/policy/semantic.rs +++ b/src/policy/semantic.rs @@ -277,14 +277,14 @@ impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { let tree = expression::Tree::from_str(s)?; - expression::FromTree::from_tree(&tree) + expression::FromTree::from_tree(tree.root()) } } serde_string_impl_pk!(Policy, "a miniscript semantic policy"); impl expression::FromTree for Policy { - fn from_tree(top: &expression::Tree) -> Result, Error> { + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { match top.name() { "UNSATISFIABLE" => { top.verify_n_children("UNSATISFIABLE", 0..=0) From ebae0ef9b7ac6d36a35c1d3ed80cc561d96760dd Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Tue, 3 Sep 2024 20:31:47 +0000 Subject: [PATCH 4/5] miniscript: remove unused Result from extdata type_check --- src/miniscript/mod.rs | 2 +- src/miniscript/types/extra_props.rs | 6 +++--- src/policy/compiler.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/miniscript/mod.rs b/src/miniscript/mod.rs index 2bbfc4822..65c2da9f6 100644 --- a/src/miniscript/mod.rs +++ b/src/miniscript/mod.rs @@ -163,7 +163,7 @@ mod private { pub fn from_ast(t: Terminal) -> Result, Error> { let res = Miniscript { ty: Type::type_check(&t)?, - ext: ExtData::type_check(&t)?, + ext: ExtData::type_check(&t), node: t, phantom: PhantomData, }; diff --git a/src/miniscript/types/extra_props.rs b/src/miniscript/types/extra_props.rs index 4f4a92840..3db46b89f 100644 --- a/src/miniscript/types/extra_props.rs +++ b/src/miniscript/types/extra_props.rs @@ -6,7 +6,7 @@ use core::cmp; use core::iter::once; -use super::{Error, ScriptContext}; +use super::ScriptContext; use crate::miniscript::context::SigType; use crate::prelude::*; use crate::{script_num_size, AbsLockTime, MiniscriptKey, RelLockTime, Terminal}; @@ -924,7 +924,7 @@ impl ExtData { /// Compute the type of a fragment assuming all the children of /// Miniscript have been computed already. - pub fn type_check(fragment: &Terminal) -> Result + pub fn type_check(fragment: &Terminal) -> Self where Ctx: ScriptContext, Pk: MiniscriptKey, @@ -990,7 +990,7 @@ impl ExtData { } }; ret.sanity_checks(); - Ok(ret) + ret } } diff --git a/src/policy/compiler.rs b/src/policy/compiler.rs index cf6d5bc9d..2d97b6886 100644 --- a/src/policy/compiler.rs +++ b/src/policy/compiler.rs @@ -540,7 +540,7 @@ impl AstElemExt { //Types and ExtData are already cached and stored in children. So, we can //type_check without cache. For Compiler extra data, we supply a cache. let ty = types::Type::type_check(&ast)?; - let ext = types::ExtData::type_check(&ast)?; + let ext = types::ExtData::type_check(&ast); let comp_ext_data = CompilerExtData::type_check_with_child(&ast, lookup_ext); Ok(AstElemExt { ms: Arc::new(Miniscript::from_components_unchecked(ast, ty, ext)), @@ -563,7 +563,7 @@ impl AstElemExt { //Types and ExtData are already cached and stored in children. So, we can //type_check without cache. For Compiler extra data, we supply a cache. let ty = types::Type::type_check(&ast)?; - let ext = types::ExtData::type_check(&ast)?; + let ext = types::ExtData::type_check(&ast); let comp_ext_data = CompilerExtData::type_check_with_child(&ast, lookup_ext); Ok(AstElemExt { ms: Arc::new(Miniscript::from_components_unchecked(ast, ty, ext)), From 8235d5ffb669b6dae035dd04ebc9191b7b55fcc2 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Fri, 30 Aug 2024 21:24:07 +0000 Subject: [PATCH 5/5] descriptor: add fuzz test comparing to published rust-miniscript 12 This bumps the local Cargo.toml version to 13, which will be the next release (since we've made many breaking changes), and in the fuzz test adds an explicit dependency on miniscript 12 from crates.io, as `old_miniscript`. Adds a single fuzztest which attempt to parse descriptors with both master and 12, to make sure they're the same. --- .github/workflows/cron-daily-fuzz.yml | 1 + Cargo-minimal.lock | 15 ++++++- Cargo-recent.lock | 15 ++++++- Cargo.toml | 2 +- fuzz/Cargo.toml | 5 +++ .../regression_descriptor_parse.rs | 43 +++++++++++++++++++ fuzz/generate-files.sh | 1 + 7 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 fuzz/fuzz_targets/regression_descriptor_parse.rs diff --git a/.github/workflows/cron-daily-fuzz.yml b/.github/workflows/cron-daily-fuzz.yml index 9c2f4e6fc..5c9f259be 100644 --- a/.github/workflows/cron-daily-fuzz.yml +++ b/.github/workflows/cron-daily-fuzz.yml @@ -22,6 +22,7 @@ compile_descriptor, compile_taproot, parse_descriptor, parse_descriptor_secret, +regression_descriptor_parse, roundtrip_concrete, roundtrip_descriptor, roundtrip_miniscript_script, diff --git a/Cargo-minimal.lock b/Cargo-minimal.lock index 4969b5a85..e5cf13fed 100644 --- a/Cargo-minimal.lock +++ b/Cargo-minimal.lock @@ -111,7 +111,8 @@ name = "descriptor-fuzz" version = "0.0.1" dependencies = [ "honggfuzz", - "miniscript", + "miniscript 12.3.0", + "miniscript 13.0.0", "regex", ] @@ -181,7 +182,17 @@ dependencies = [ [[package]] name = "miniscript" -version = "12.2.0" +version = "12.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bd3c9608217b0d6fa9c9c8ddd875b85ab72bd4311cfc8db35e1b5a08fc11f4d" +dependencies = [ + "bech32", + "bitcoin", +] + +[[package]] +name = "miniscript" +version = "13.0.0" dependencies = [ "bech32", "bitcoin", diff --git a/Cargo-recent.lock b/Cargo-recent.lock index 4969b5a85..e5cf13fed 100644 --- a/Cargo-recent.lock +++ b/Cargo-recent.lock @@ -111,7 +111,8 @@ name = "descriptor-fuzz" version = "0.0.1" dependencies = [ "honggfuzz", - "miniscript", + "miniscript 12.3.0", + "miniscript 13.0.0", "regex", ] @@ -181,7 +182,17 @@ dependencies = [ [[package]] name = "miniscript" -version = "12.2.0" +version = "12.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bd3c9608217b0d6fa9c9c8ddd875b85ab72bd4311cfc8db35e1b5a08fc11f4d" +dependencies = [ + "bech32", + "bitcoin", +] + +[[package]] +name = "miniscript" +version = "13.0.0" dependencies = [ "bech32", "bitcoin", diff --git a/Cargo.toml b/Cargo.toml index 88860af96..929b68e4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "miniscript" -version = "12.2.0" +version = "13.0.0" authors = ["Andrew Poelstra , Sanket Kanjalkar "] license = "CC0-1.0" homepage = "https://github.com/rust-bitcoin/rust-miniscript/" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index db5187266..36e56edf2 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -12,6 +12,7 @@ cargo-fuzz = true [dependencies] honggfuzz = { version = "0.5.56", default-features = false } miniscript = { path = "..", features = [ "compiler" ] } +old_miniscript = { package = "miniscript", version = "12.3" } regex = "1.0" @@ -31,6 +32,10 @@ path = "fuzz_targets/parse_descriptor.rs" name = "parse_descriptor_secret" path = "fuzz_targets/parse_descriptor_secret.rs" +[[bin]] +name = "regression_descriptor_parse" +path = "fuzz_targets/regression_descriptor_parse.rs" + [[bin]] name = "roundtrip_concrete" path = "fuzz_targets/roundtrip_concrete.rs" diff --git a/fuzz/fuzz_targets/regression_descriptor_parse.rs b/fuzz/fuzz_targets/regression_descriptor_parse.rs new file mode 100644 index 000000000..c25fd2c55 --- /dev/null +++ b/fuzz/fuzz_targets/regression_descriptor_parse.rs @@ -0,0 +1,43 @@ +use core::str::FromStr; + +use honggfuzz::fuzz; +use miniscript::{Descriptor, DescriptorPublicKey}; +use old_miniscript::{Descriptor as OldDescriptor, DescriptorPublicKey as OldDescriptorPublicKey}; + +type Desc = Descriptor; +type OldDesc = OldDescriptor; + +fn do_test(data: &[u8]) { + let data_str = String::from_utf8_lossy(data); + match (Desc::from_str(&data_str), OldDesc::from_str(&data_str)) { + (Err(_), Err(_)) => {} + (Ok(x), Err(e)) => panic!("new logic parses {} as {:?}, old fails with {}", data_str, x, e), + (Err(e), Ok(x)) => panic!("old logic parses {} as {:?}, new fails with {}", data_str, x, e), + (Ok(new), Ok(old)) => { + assert_eq!( + old.to_string(), + new.to_string(), + "input {} (left is old, right is new)", + data_str + ) + } + } +} + +fn main() { + loop { + fuzz!(|data| { + do_test(data); + }); + } +} + +#[cfg(test)] +mod tests { + #[test] + fn duplicate_crash() { + crate::do_test( + b"tr(02dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd,{1,unun:0})", + ) + } +} diff --git a/fuzz/generate-files.sh b/fuzz/generate-files.sh index 743f9066f..b2b40a5eb 100755 --- a/fuzz/generate-files.sh +++ b/fuzz/generate-files.sh @@ -24,6 +24,7 @@ cargo-fuzz = true [dependencies] honggfuzz = { version = "0.5.56", default-features = false } miniscript = { path = "..", features = [ "compiler" ] } +old_miniscript = { package = "miniscript", git = "https://github.com/apoelstra/rust-miniscript/", rev = "1259375d7b7c91053e09d1cbe3db983612fe301c" } regex = "1.0" EOF