From a7044cb504fb6a065feca1ca5230e5247d95f23f Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 2 Sep 2024 19:53:03 +0000 Subject: [PATCH] expression: rewrite Tree module to no longer use a recursive data type This significantly speeds up and simplifies tree parsing, at the cost of having a more complicated API (but we mostly addressed the API question in the previous commits). This completely eliminates recursion for the Tree data type, including in the Drop impl. Big diff but there are only two "real" changes -- expression/mod.rs is substantially rewritten of course since we replace the core datatype, and Tr::from_tree is substantially rewritten since doing so was the point of this change. The rest of the changes are mechanically changing the signature of expression::FromTree::from_tree everywhere. --- src/descriptor/bare.rs | 12 +- src/descriptor/mod.rs | 4 +- src/descriptor/segwitv0.rs | 8 +- src/descriptor/sh.rs | 4 +- src/descriptor/sortedmulti.rs | 2 +- src/descriptor/tr.rs | 142 ++++----- src/expression/mod.rs | 585 +++++++++++++++++++++++++--------- src/miniscript/astelem.rs | 30 +- src/miniscript/mod.rs | 12 +- src/policy/concrete.rs | 8 +- src/policy/semantic.rs | 4 +- 11 files changed, 536 insertions(+), 275 deletions(-) diff --git a/src/descriptor/bare.rs b/src/descriptor/bare.rs index 3cb0da61e..b41ee2850 100644 --- a/src/descriptor/bare.rs +++ b/src/descriptor/bare.rs @@ -175,8 +175,8 @@ impl Liftable for Bare { } impl FromTree for Bare { - fn from_tree(top: &expression::Tree) -> Result { - let sub = Miniscript::::from_tree(top)?; + fn from_tree(root: expression::TreeIterItem) -> Result { + let sub = Miniscript::::from_tree(root)?; BareCtx::top_level_checks(&sub)?; Bare::new(sub) } @@ -186,7 +186,7 @@ impl core::str::FromStr for Bare { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } @@ -369,8 +369,8 @@ impl Liftable for Pkh { } impl FromTree for Pkh { - fn from_tree(top: &expression::Tree) -> Result { - let pk = top + fn from_tree(root: expression::TreeIterItem) -> Result { + let pk = root .verify_terminal_parent("pkh", "public key") .map_err(Error::Parse)?; Pkh::new(pk).map_err(Error::ContextError) @@ -381,7 +381,7 @@ impl core::str::FromStr for Pkh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs index f9e673c2b..1b6bc788c 100644 --- a/src/descriptor/mod.rs +++ b/src/descriptor/mod.rs @@ -965,7 +965,7 @@ impl Descriptor { impl crate::expression::FromTree for Descriptor { /// Parse an expression tree into a descriptor. - fn from_tree(top: &expression::Tree) -> Result, Error> { + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { Ok(match (top.name(), top.n_children()) { ("pkh", 1) => Descriptor::Pkh(Pkh::from_tree(top)?), ("wpkh", 1) => Descriptor::Wpkh(Wpkh::from_tree(top)?), @@ -981,7 +981,7 @@ impl FromStr for Descriptor { type Err = Error; fn from_str(s: &str) -> Result, Error> { let top = expression::Tree::from_str(s)?; - let ret = Self::from_tree(&top)?; + let ret = Self::from_tree(top.root())?; if let Descriptor::Tr(ref inner) = ret { // FIXME preserve weird/broken behavior from 12.x. // See https://github.com/rust-bitcoin/rust-miniscript/issues/734 diff --git a/src/descriptor/segwitv0.rs b/src/descriptor/segwitv0.rs index b09a9f887..8ad173b76 100644 --- a/src/descriptor/segwitv0.rs +++ b/src/descriptor/segwitv0.rs @@ -247,7 +247,7 @@ impl Liftable for Wsh { } impl crate::expression::FromTree for Wsh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let top = top .verify_toplevel("wsh", 1..=1) .map_err(From::from) @@ -284,7 +284,7 @@ impl core::str::FromStr for Wsh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Wsh::::from_tree(&top) + Wsh::::from_tree(top.root()) } } @@ -483,7 +483,7 @@ impl Liftable for Wpkh { } impl crate::expression::FromTree for Wpkh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let pk = top .verify_terminal_parent("wpkh", "public key") .map_err(Error::Parse)?; @@ -495,7 +495,7 @@ impl core::str::FromStr for Wpkh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/sh.rs b/src/descriptor/sh.rs index f00e3f093..8b0195e8f 100644 --- a/src/descriptor/sh.rs +++ b/src/descriptor/sh.rs @@ -81,7 +81,7 @@ impl fmt::Display for Sh { } impl crate::expression::FromTree for Sh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let top = top .verify_toplevel("sh", 1..=1) .map_err(From::from) @@ -105,7 +105,7 @@ impl core::str::FromStr for Sh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/sortedmulti.rs b/src/descriptor/sortedmulti.rs index 85664635e..ff3f62227 100644 --- a/src/descriptor/sortedmulti.rs +++ b/src/descriptor/sortedmulti.rs @@ -59,7 +59,7 @@ impl SortedMultiVec { } /// Parse an expression tree into a SortedMultiVec - pub fn from_tree(tree: &expression::Tree) -> Result + pub fn from_tree(tree: expression::TreeIterItem) -> Result where Pk: FromStrKey, { diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs index ba68a73fa..ab385c940 100644 --- a/src/descriptor/tr.rs +++ b/src/descriptor/tr.rs @@ -14,7 +14,6 @@ use sync::Arc; use super::checksum; use crate::descriptor::DefiniteDescriptorKey; use crate::expression::{self, FromTree}; -use crate::iter::TreeLike as _; use crate::miniscript::satisfy::{Placeholder, Satisfaction, SchnorrSigType, Witness}; use crate::miniscript::Miniscript; use crate::plan::AssetProvider; @@ -495,99 +494,84 @@ impl core::str::FromStr for Tr { fn from_str(s: &str) -> Result { let expr_tree = expression::Tree::from_str(s)?; - Self::from_tree(&expr_tree) + Self::from_tree(expr_tree.root()) } } impl crate::expression::FromTree for Tr { - fn from_tree(expr_tree: &expression::Tree) -> Result { + fn from_tree(root: expression::TreeIterItem) -> Result { use crate::expression::{Parens, ParseTreeError}; - expr_tree - .verify_toplevel("tr", 1..=2) + struct TreeStack<'s, Pk: MiniscriptKey> { + inner: Vec<(expression::TreeIterItem<'s>, TapTree)>, + } + + impl<'s, Pk: MiniscriptKey> TreeStack<'s, Pk> { + fn new() -> Self { Self { inner: Vec::with_capacity(128) } } + + fn push(&mut self, parent: expression::TreeIterItem<'s>, tree: TapTree) { + let mut next_push = (parent, tree); + while let Some(top) = self.inner.pop() { + if next_push.0.index() == top.0.index() { + next_push.0 = top.0.parent().unwrap(); + next_push.1 = TapTree::combine(top.1, next_push.1); + } else { + self.inner.push(top); + break; + } + } + self.inner.push(next_push); + } + + fn pop_final(&mut self) -> Option> { + assert_eq!(self.inner.len(), 1); + self.inner.pop().map(|x| x.1) + } + } + + root.verify_toplevel("tr", 1..=2) .map_err(From::from) .map_err(Error::Parse)?; - let mut round_paren_depth = 0; + let mut root_children = root.children(); + let internal_key: Pk = root_children + .next() + .unwrap() // `verify_toplevel` above checked that first child existed + .verify_terminal("internal key") + .map_err(Error::Parse)?; - let mut internal_key = None; - let mut tree_stack = vec![]; + let tap_tree = match root_children.next() { + None => return Tr::new(internal_key, None), + Some(tree) => tree, + }; - for item in expr_tree.verbose_pre_order_iter() { - // Top-level "tr" node. - if item.index == 0 { - if item.is_complete { - debug_assert!( - internal_key.is_some(), - "checked above that top-level 'tr' has children" - ); + let mut tree_stack = TreeStack::new(); + let mut tap_tree_iter = tap_tree.pre_order_iter(); + // while let construction needed because we modify the iterator inside the loop + // (by calling skip_descendants to skip over the contents of the tapscripts). + while let Some(node) = tap_tree_iter.next() { + if node.parens() == Parens::Curly { + if !node.name().is_empty() { + return Err(Error::Parse(ParseError::Tree(ParseTreeError::IncorrectName { + actual: node.name().to_owned(), + expected: "", + }))); } - } else if item.index == 1 { - // First child of tr, which must be the internal key - internal_key = item - .node - .verify_terminal("internal key") - .map_err(Error::Parse) - .map(Some)?; + node.verify_n_children("taptree branch", 2..=2) + .map_err(From::from) + .map_err(Error::Parse)?; } else { - // From here on we are into the taptree. - if item.n_children_yielded == 0 { - match item.node.parens() { - Parens::Curly => { - if !item.node.name().is_empty() { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IncorrectName { - actual: item.node.name().to_owned(), - expected: "", - }, - ))); - } - if round_paren_depth > 0 { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IllegalCurlyBrace { - pos: item.node.children_pos(), - }, - ))); - } - } - Parens::Round => round_paren_depth += 1, - _ => {} - } - } - if item.is_complete { - if item.node.parens() == Parens::Curly { - if item.n_children_yielded == 2 { - let rchild = tree_stack.pop().unwrap(); - let lchild = tree_stack.pop().unwrap(); - tree_stack.push(TapTree::combine(lchild, rchild)); - } else { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IncorrectNumberOfChildren { - description: "Taptree node", - n_children: item.n_children_yielded, - minimum: Some(2), - maximum: Some(2), - }, - ))); - } - } else { - if item.node.parens() == Parens::Round { - round_paren_depth -= 1; - } - if round_paren_depth == 0 { - let script = Miniscript::from_tree(item.node)?; - // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 - if script.ty.corr.base != crate::miniscript::types::Base::B { - return Err(Error::NonTopLevel(format!("{:?}", script))); - }; - tree_stack.push(TapTree::Leaf(Arc::new(script))); - } - } - } + let script = Miniscript::from_tree(node)?; + // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 + if script.ty.corr.base != crate::miniscript::types::Base::B { + return Err(Error::NonTopLevel(format!("{:?}", script))); + }; + + tree_stack.push(node.parent().unwrap(), TapTree::Leaf(Arc::new(script))); + tap_tree_iter.skip_descendants(); } } - assert!(tree_stack.len() <= 1); - Tr::new(internal_key.unwrap(), tree_stack.pop()) + Tr::new(internal_key, tree_stack.pop_final()) } } diff --git a/src/expression/mod.rs b/src/expression/mod.rs index 390082cd1..7f7057095 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -1,6 +1,28 @@ // SPDX-License-Identifier: CC0-1.0 -//! # Function-like Expression Language +//! Expression Trees +//! +//! This module represents expression trees, which are trees whose nodes have +//! names and arbitrary numbers of children. As strings, they are defined by +//! the following rules: +//! +//! * Any sequence of valid descriptor characters, including the empty string, is a "name". +//! * A name is an expression (called a "leaf"). +//! * Given n expression trees `s_1`, ..., `s_n` and a name `X`, `X(s_1,...,s_n)` is an expression. +//! * Given n expression trees `s_1`, ..., `s_n` and a name `X`, `X{s_1,...,s_n}` is an expression. +//! +//! Note that while `leaf` and `leaf()` are both expressions, only the former is +//! actually a leaf. The latter has one child which is a leaf with an empty name. +//! If these are intended to be equivalent, the caller must add logic to do this +//! when converting the expression tree into its final type. +//! +//! All recursive structures in this library can be serialized and parsed as trees, +//! though of course each data structure further limits the grammar (e.g. to enforce +//! that names be valid Miniscript fragment names, public keys, hashes or timelocks). +//! +//! Users of this library probably do not need to use this module at all, unless they +//! are implementing their own Miniscript-like structures or extensions to Miniscript. +//! It is intended to be used as a utility to implement string parsing. //! mod error; @@ -11,53 +33,131 @@ use core::str::FromStr; pub use self::error::{ParseNumError, ParseThresholdError, ParseTreeError}; use crate::blanket_traits::StaticDebugAndDisplay; use crate::descriptor::checksum::verify_checksum; -use crate::iter::{self, TreeLike}; use crate::prelude::*; use crate::{AbsLockTime, Error, ParseError, RelLockTime, Threshold, MAX_RECURSION_DEPTH}; /// Allowed characters are descriptor strings. pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; -#[derive(Debug)] -/// A token of the form `x(...)` or `x` -pub struct Tree<'a> { - /// The name `x` - name: &'a str, - /// Position one past the last character of the node's name. If it has - /// children, the position of the '(' or '{'. - children_pos: usize, - /// The type of parentheses surrounding the node's children. +/// Internal data structure representing a node of an expression tree. +/// +/// Users of the public API will always interact with this using the +/// wrapper type [`TreeIterItem`] which also contains a reference to +/// the whole tree. +#[derive(Debug, PartialEq, Eq)] +struct TreeNode<'s> { + name: &'s str, + name_pos: usize, parens: Parens, - /// The comma-separated contents of the `(...)`, if any - args: Vec>, + n_children: usize, + index: usize, + parent_idx: Option, + last_child_idx: Option, + right_sibling_idx: Option, } -impl PartialEq for Tree<'_> { - fn eq(&self, other: &Self) -> bool { - let mut stack = vec![(self, other)]; - while let Some((me, you)) = stack.pop() { - if me.name != you.name || me.args.len() != you.args.len() { - return false; - } - stack.extend(me.args.iter().zip(you.args.iter())); +impl TreeNode<'_> { + fn null(index: usize) -> Self { + TreeNode { + name: "", + name_pos: 0, + parens: Parens::None, + n_children: 0, + index, + parent_idx: None, + last_child_idx: None, + right_sibling_idx: None, } - true } } -impl Eq for Tree<'_> {} - -impl<'a, 't> TreeLike for &'t Tree<'a> { - type NaryChildren = &'t [Tree<'a>]; - fn nary_len(tc: &Self::NaryChildren) -> usize { tc.len() } - fn nary_index(tc: Self::NaryChildren, idx: usize) -> Self { &tc[idx] } +/// An iterator over the nodes of a tree, in pre-order. +/// +/// This has several differences from the pre-order iterator provided by [`crate::iter::TreeLike`]: +/// +/// * this is double-ended, so a right-to-left post-order iterator can be obtained by `.rev()`. +/// * the yielded items represent sub-trees which themselves can be iterated from +/// * the iterator can be told to skip all descendants of the current node, using +/// [`PreOrderIter::skip_descendants`]. +pub struct PreOrderIter<'s> { + nodes: &'s [TreeNode<'s>], + inner: core::ops::RangeInclusive, +} - fn as_node(&self) -> iter::Tree { - if self.args.is_empty() { - iter::Tree::Nullary - } else { - iter::Tree::Nary(&self.args) +impl PreOrderIter<'_> { + /// Skip all the descendants of the most recently-yielded item. + /// + /// Here "most recently-yielded item" means the most recently-yielded item when + /// running the iterator forward. If you run the iterator backward, e.g. by iterating + /// on `iter.by_ref().rev()`, those items are not considered, and the resulting + /// behavior of this function may be surprising. + /// + /// If this method is called before any nodes have been yielded, the entire iterator + /// will be skipped. + pub fn skip_descendants(&mut self) { + if self.inner.is_empty() { + return; } + + let last_index = self.inner.start().saturating_sub(1); + // Construct a synthetic iterator over all descendants + let last_item = TreeIterItem { nodes: self.nodes, index: last_index }; + let skip_past = last_item.rightmost_descendant_idx(); + // ...and copy the indices out of that. + debug_assert!(skip_past + 1 >= *self.inner.start()); + debug_assert!(skip_past <= *self.inner.end()); + self.inner = skip_past + 1..=*self.inner.end(); + } +} + +impl<'s> Iterator for PreOrderIter<'s> { + type Item = TreeIterItem<'s>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|n| TreeIterItem { nodes: self.nodes, index: n }) + } + + fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } +} + +impl DoubleEndedIterator for PreOrderIter<'_> { + fn next_back(&mut self) -> Option { + self.inner + .next_back() + .map(|n| TreeIterItem { nodes: self.nodes, index: n }) + } +} + +impl ExactSizeIterator for PreOrderIter<'_> { + // The inner `RangeInclusive` does not impl ExactSizeIterator because the + // range 0..=usize::MAX would have length usize::MAX + 1. But we know + // that our range is limited by the `n_nodes` variable returned by + // `parse_pre_check`, and if THAT didn't overflow then this won't either. +} + +/// A tree node, as yielded from an iterator. +#[derive(Copy, Clone)] +pub struct TreeIterItem<'s> { + nodes: &'s [TreeNode<'s>], + index: usize, +} + +/// An iterator over the direct children of a tree node. +pub struct DirectChildIterator<'s> { + current: Option>, +} + +impl<'s> Iterator for DirectChildIterator<'s> { + type Item = TreeIterItem<'s>; + + fn next(&mut self) -> Option { + let item = self.current.take()?; + self.current = item.nodes[item.index] + .right_sibling_idx + .map(|n| TreeIterItem { nodes: item.nodes, index: n }); + Some(item) } } @@ -75,35 +175,87 @@ pub enum Parens { /// A trait for extracting a structure from a Tree representation in token form pub trait FromTree: Sized { /// Extract a structure from Tree representation - fn from_tree(top: &Tree) -> Result; + fn from_tree(root: TreeIterItem) -> Result; } -impl<'a> Tree<'a> { +impl<'s> TreeIterItem<'s> { /// The name of this tree node. - pub fn name(&self) -> &str { self.name } + pub fn name(self) -> &'s str { self.nodes[self.index].name } /// The 0-indexed byte-position of the name in the original expression tree. - pub fn name_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + pub fn name_pos(self) -> usize { self.nodes[self.index].name_pos } /// The 0-indexed byte-position of the '(' or '{' character which starts the /// expression's children. /// /// If the expression has no children, returns one past the end of the name. - pub fn children_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + pub fn children_pos(self) -> usize { self.name_pos() + self.name().len() + 1 } /// The number of children this node has. - pub fn n_children(&self) -> usize { self.args.len() } + pub fn n_children(self) -> usize { self.nodes[self.index].n_children } /// The type of parenthesis surrounding this node's children. /// /// If the node has no children, this will be `Parens::None`. - pub fn parens(&self) -> Parens { self.parens } + pub fn parens(self) -> Parens { self.nodes[self.index].parens } /// An iterator over the direct children of this node. /// - /// If you want to iterate recursively, use the [`TreeLike`] API which - /// provides methods `pre_order_iter` and `post_order_iter`. - pub fn children(&self) -> impl ExactSizeIterator { self.args.iter() } + /// If you want to iterate recursively, use the [`Self::pre_order_iter`] + /// or [`Self::rtl_post_order_iter`] method. + pub fn children(self) -> DirectChildIterator<'s> { + DirectChildIterator { current: self.first_child() } + } + + /// The index of the node in its underlying tree. + pub fn index(&self) -> usize { self.index } + + /// Accessor for the parent of the node, if it has a parent (is not the root). + pub fn parent(self) -> Option { + self.nodes[self.index] + .parent_idx + .map(|n| Self { nodes: self.nodes, index: n }) + } + + /// Whether the node is the first child of its parent. + /// + /// Returns false for the root. + pub fn is_first_child(self) -> bool { + self.nodes[self.index] + .parent_idx + .map(|n| n + 1 == self.index) + .unwrap_or(false) + } + + /// Accessor for the first child of the node, if it has a first child. + pub fn first_child(self) -> Option { + // If the node has any children at all, its first child is the one right after it. + self.nodes[self.index] + .last_child_idx + .map(|_| Self { nodes: self.nodes, index: self.index + 1 }) + } + + /// Accessor for the sibling of the node, if it has one. + pub fn right_sibling(self) -> Option { + self.nodes[self.index] + .right_sibling_idx + .map(|n| Self { nodes: self.nodes, index: n }) + } + + /// Helper function to find the rightmost descendant of a node. + /// + /// Used to construct iterators which cover only the node and its descendants. + /// If the node has no descendants, returns its own index. + fn rightmost_descendant_idx(self) -> usize { + let mut scan = self.index; + while let Some(idx) = self.nodes[scan].last_child_idx { + scan = idx; + while let Some(idx) = self.nodes[scan].right_sibling_idx { + scan = idx; + } + } + scan + } /// Split the name by a separating character. /// @@ -111,15 +263,18 @@ impl<'a> Tree<'a> { /// the suffix after the separator. Otherwise returns the whole name. /// /// If the separator occurs multiple times, returns an error. - pub fn name_separated(&self, separator: char) -> Result<(Option<&str>, &str), ParseTreeError> { - let mut name_split = self.name.splitn(3, separator); + pub fn name_separated( + self, + separator: char, + ) -> Result<(Option<&'s str>, &'s str), ParseTreeError> { + let mut name_split = self.name().splitn(3, separator); match (name_split.next(), name_split.next(), name_split.next()) { (None, _, _) => unreachable!("'split' always yields at least one element"), - (Some(_), None, _) => Ok((None, self.name)), + (Some(_), None, _) => Ok((None, self.name())), (Some(prefix), Some(name), None) => Ok((Some(prefix), name)), (Some(_), Some(_), Some(suffix)) => Err(ParseTreeError::MultipleSeparators { separator, - pos: self.children_pos - suffix.len() - 1, + pos: self.children_pos() - suffix.len() - 1, }), } } @@ -129,7 +284,7 @@ impl<'a> Tree<'a> { /// The `description` argument is only used to populate the error return, /// and is not validated in any way. pub fn verify_n_children( - &self, + self, description: &'static str, n_children: impl ops::RangeBounds, ) -> Result<(), ParseTreeError> { @@ -167,19 +322,19 @@ impl<'a> Tree<'a> { &self, name: &'static str, n_children: impl ops::RangeBounds, - ) -> Result<&Self, ParseTreeError> { + ) -> Result { assert!( !n_children.contains(&0), "verify_toplevel is intended for nodes with >= 1 child" ); - if self.name != name { - Err(ParseTreeError::IncorrectName { actual: self.name.to_owned(), expected: name }) - } else if self.parens == Parens::Curly { - Err(ParseTreeError::IllegalCurlyBrace { pos: self.children_pos }) + if self.name() != name { + Err(ParseTreeError::IncorrectName { actual: self.name().to_owned(), expected: name }) + } else if self.parens() == Parens::Curly { + Err(ParseTreeError::IllegalCurlyBrace { pos: self.children_pos() }) } else { self.verify_n_children(name, n_children)?; - Ok(&self.args[0]) + Ok(self.first_child().unwrap()) } } @@ -191,10 +346,11 @@ impl<'a> Tree<'a> { pub fn verify_after(&self) -> Result { self.verify_n_children("after", 1..=1) .map_err(ParseError::Tree)?; - self.args[0] + let child = self.first_child().unwrap(); + child .verify_n_children("absolute locktime", 0..=0) .map_err(ParseError::Tree)?; - parse_num(self.args[0].name) + parse_num(child.name()) .map_err(ParseError::Num) .and_then(|n| AbsLockTime::from_consensus(n).map_err(ParseError::AbsoluteLockTime)) } @@ -207,10 +363,11 @@ impl<'a> Tree<'a> { pub fn verify_older(&self) -> Result { self.verify_n_children("older", 1..=1) .map_err(ParseError::Tree)?; - self.args[0] + let child = self.first_child().unwrap(); + child .verify_n_children("relative locktime", 0..=0) .map_err(ParseError::Tree)?; - parse_num(self.args[0].name) + parse_num(child.name()) .map_err(ParseError::Num) .and_then(|n| RelLockTime::from_consensus(n).map_err(ParseError::RelativeLockTime)) } @@ -228,7 +385,7 @@ impl<'a> Tree<'a> { { self.verify_n_children(description, 0..=0) .map_err(ParseError::Tree)?; - T::from_str(self.name).map_err(ParseError::box_from_str) + T::from_str(self.name()).map_err(ParseError::box_from_str) } /// Check that a tree node has exactly one child, which is a terminal. @@ -248,7 +405,9 @@ impl<'a> Tree<'a> { { self.verify_n_children(description, 1..=1) .map_err(ParseError::Tree)?; - self.args[0].verify_terminal(inner_description) + self.first_child() + .unwrap() + .verify_terminal(inner_description) } /// Check that a tree node has exactly two children. @@ -257,12 +416,11 @@ impl<'a> Tree<'a> { /// /// The `description` argument is only used to populate the error return, /// and is not validated in any way. - pub fn verify_binary( - &self, - description: &'static str, - ) -> Result<(&Self, &Self), ParseTreeError> { + pub fn verify_binary(&self, description: &'static str) -> Result<(Self, Self), ParseTreeError> { self.verify_n_children(description, 2..=2)?; - Ok((&self.args[0], &self.args[1])) + let first_child = self.first_child().unwrap(); + let second_child = first_child.right_sibling().unwrap(); + Ok((first_child, second_child)) } /// Parses an expression tree as a threshold (a term with at least one child, @@ -279,11 +437,11 @@ impl<'a> Tree<'a> { /// and be able to return multiple error types.) pub fn verify_threshold< const MAX: usize, - F: FnMut(&Self) -> Result, + F: FnMut(Self) -> Result, T, E: From, >( - &self, + &'s self, mut map_child: F, ) -> Result, E> { let mut child_iter = self.children(); @@ -303,26 +461,59 @@ impl<'a> Tree<'a> { .and_then(|thresh| thresh.translate_by_index(|_| map_child(child_iter.next().unwrap()))) } + /// Returns an iterator over the nodes of the tree, in pre-order. + /// + /// Constructing the iterator takes O(depth) time. + pub fn pre_order_iter(&'s self) -> PreOrderIter<'s> { + PreOrderIter { nodes: self.nodes, inner: self.index..=self.rightmost_descendant_idx() } + } + + /// Returns an iterator over the nodes of the tree, in right-to-left post-order. + pub fn rtl_post_order_iter(&'s self) -> core::iter::Rev> { + self.pre_order_iter().rev() + } + /// Check that a tree has no curly-brace children in it. pub fn verify_no_curly_braces(&self) -> Result<(), ParseTreeError> { - for tree in self.pre_order_iter() { - if tree.parens == Parens::Curly { - return Err(ParseTreeError::IllegalCurlyBrace { pos: tree.children_pos }); + for node in self.rtl_post_order_iter() { + if node.parens() == Parens::Curly { + return Err(ParseTreeError::IllegalCurlyBrace { pos: node.children_pos() }); } } Ok(()) } +} + +#[derive(Debug, PartialEq, Eq)] +/// A parsed expression tree. See module-level documentation for syntax. +pub struct Tree<'s> { + /// The nodes, stored in pre-order. + nodes: Vec>, +} + +impl<'a> Tree<'a> { + /// Returns the root node of the tree, or `None` if the tree is empty. + pub fn root(&'a self) -> TreeIterItem<'a> { + assert_ne!( + self.nodes.len(), + 0, + "trees cannot be empty; the empty string parses as a single root with empty name" + ); + TreeIterItem { nodes: &self.nodes, index: 0 } + } /// Check that a string is a well-formed expression string, with optional /// checksum. /// - /// Returns the string with the checksum removed and its tree depth. - fn parse_pre_check(s: &str) -> Result<(&str, usize), ParseTreeError> { + /// Returns the string with the checksum removed, the maximum depth, and the + /// number of nodes in the tree. + fn parse_pre_check(s: &str) -> Result<(&str, usize, usize), ParseTreeError> { // First, scan through string to make sure it is well-formed. // Do ASCII/checksum check first; after this we can use .bytes().enumerate() rather // than .char_indices(), which is *significantly* faster. let s = verify_checksum(s)?; + let mut n_nodes = 1; let mut max_depth = 0; let mut open_paren_stack = Vec::with_capacity(128); for (pos, ch) in s.bytes().enumerate() { @@ -380,9 +571,15 @@ impl<'a> Tree<'a> { // now. return Err(ParseTreeError::UnmatchedCloseParen { ch: ch.into(), pos }); } - } else if ch == b',' && open_paren_stack.is_empty() { - // We consider commas outside of the tree to be "trailing characters" - return Err(ParseTreeError::TrailingCharacter { ch: ch.into(), pos }); + + n_nodes += 1; + } else if ch == b',' { + if open_paren_stack.is_empty() { + // We consider commas outside of the tree to be "trailing characters" + return Err(ParseTreeError::TrailingCharacter { ch: ch.into(), pos }); + } + + n_nodes += 1; } } // Catch "early end of string" @@ -399,7 +596,7 @@ impl<'a> Tree<'a> { }); } - Ok((s, max_depth)) + Ok((s, max_depth, n_nodes)) } /// Parses a tree from a string @@ -411,61 +608,73 @@ impl<'a> Tree<'a> { } fn from_str_inner(s: &'a str) -> Result { + fn new_node<'a>(nodes: &mut [TreeNode<'a>], stack: &[usize], pos: usize) -> TreeNode<'a> { + let parent_idx = stack.last().copied(); + if let Some(idx) = parent_idx { + nodes[idx].n_children += 1; + nodes[idx].last_child_idx = Some(nodes.len()); + } + + let mut new = TreeNode::null(nodes.len()); + new.name_pos = pos; + new.parent_idx = parent_idx; + new + } + // First, scan through string to make sure it is well-formed. - let (s, max_depth) = Self::parse_pre_check(s)?; - - // Now, knowing it is sane and well-formed, we can easily parse it backward, - // which will yield a post-order right-to-left iterator of its nodes. - let mut stack = Vec::with_capacity(max_depth); - let mut children_parens: Option<(Vec<_>, usize, Parens)> = None; - let mut node_name_end = s.len(); - for (pos, ch) in s.bytes().enumerate().rev() { - if ch == b')' || ch == b'}' { - stack.push(vec![]); - node_name_end = pos; + let (s, max_depth, n_nodes) = Self::parse_pre_check(s)?; + + let mut nodes = Vec::with_capacity(n_nodes); + + // Now, knowing it is sane and well-formed, we can easily parse it forward, + // as the string serialization lists all the nodes in pre-order. + let mut parent_stack = Vec::with_capacity(max_depth); + let mut current_node = Some(TreeNode::null(0)); + for (pos, ch) in s.bytes().enumerate() { + if ch == b'(' || ch == b'{' { + let mut current = current_node.expect("'(' only occurs after a node name"); + current.name = &s[current.name_pos..pos]; + current.parens = match ch { + b'(' => Parens::Round, + b'{' => Parens::Curly, + _ => unreachable!(), + }; + parent_stack.push(nodes.len()); + nodes.push(current); + + current_node = Some(new_node(&mut nodes, &parent_stack, pos + 1)); } else if ch == b',' { - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - - let top = stack.last_mut().unwrap(); - let new_tree = - Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; - top.push(new_tree); - node_name_end = pos; - } else if ch == b'(' || ch == b'{' { - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - - let mut top = stack.pop().unwrap(); - let new_tree = - Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; - top.push(new_tree); - children_parens = Some(( - top, - pos, - match ch { - b'(' => Parens::Round, - b'{' => Parens::Curly, - _ => unreachable!(), - }, - )); - node_name_end = pos; + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..pos]; + nodes.push(current); + } + + if let Some(last_sib_idx) = + parent_stack.last().and_then(|n| nodes[*n].last_child_idx) + { + nodes[last_sib_idx].right_sibling_idx = Some(nodes.len()); + } + current_node = Some(new_node(&mut nodes, &parent_stack, pos + 1)); + } else if ch == b')' || ch == b'}' { + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..pos]; + nodes.push(current); + } + + current_node = None; + parent_stack.pop(); } } + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..]; + nodes.push(current); + } + + assert_eq!(parent_stack.capacity(), max_depth); + assert_eq!(nodes.capacity(), n_nodes); + assert_eq!(nodes.len(), nodes.capacity()); - assert_eq!(stack.len(), 0); - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - Ok(Tree { name: &s[..node_name_end], children_pos, parens, args }) + Ok(Tree { nodes }) } } @@ -488,29 +697,76 @@ mod tests { use super::*; use crate::ParseError; - /// Test functions to manually build trees - fn leaf(name: &str) -> Tree { - Tree { name, parens: Parens::None, children_pos: name.len(), args: vec![] } + struct NodeBuilder<'a> { + inner: Vec>, + sibling_stack: Vec>, + parent_stack: Vec, + str_idx: usize, } - fn paren_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { - let mut offset = name.len() + 1; // +1 for open paren - for arg in &mut args { - arg.children_pos += offset; - offset += arg.name.len() + 1; // +1 for comma + impl<'a> NodeBuilder<'a> { + fn new() -> Self { + NodeBuilder { + inner: vec![], + sibling_stack: vec![None], + parent_stack: vec![], + str_idx: 0, + } } - Tree { name, parens: Parens::Round, children_pos: name.len(), args } - } + fn new_node_internal(&mut self, name: &'a str) -> TreeNode<'a> { + let mut new = TreeNode::null(self.inner.len()); + if let Some(idx) = self.parent_stack.last().copied() { + self.inner[idx].n_children += 1; + self.inner[idx].last_child_idx = Some(self.inner.len()); + new.parent_idx = Some(idx); + } + if let Some(idx) = self.sibling_stack.last().unwrap() { + self.inner[*idx].right_sibling_idx = Some(self.inner.len()); + self.str_idx += 1; + } + new.name = name; + new.name_pos = self.str_idx; + + *self.sibling_stack.last_mut().unwrap() = Some(self.inner.len()); + self.str_idx += name.len(); + new + } + + fn leaf(mut self, name: &'a str) -> Self { + let new = self.new_node_internal(name); - fn brace_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { - let mut offset = name.len() + 1; // +1 for open paren - for arg in &mut args { - arg.children_pos += offset; - offset += arg.name.len() + 1; // +1 for comma + self.inner.push(new); + self } - Tree { name, parens: Parens::Curly, children_pos: name.len(), args } + fn open(mut self, name: &'a str, paren: char) -> Self { + let mut new = self.new_node_internal(name); + + new.parens = match paren { + '(' => Parens::Round, + '{' => Parens::Curly, + _ => panic!(), + }; + self.str_idx += 1; + + self.parent_stack.push(self.inner.len()); + self.sibling_stack.push(None); + self.inner.push(new); + self + } + + fn close(mut self) -> Self { + self.str_idx += 1; + self.parent_stack.pop(); + self.sibling_stack.pop(); + self + } + + fn into_tree(self) -> Tree<'a> { + assert_eq!(self.parent_stack.len(), 0); + Tree { nodes: self.inner } + } } #[test] @@ -525,7 +781,10 @@ mod tests { #[test] fn parse_tree_basic() { - assert_eq!(Tree::from_str("thresh").unwrap(), leaf("thresh")); + assert_eq!( + Tree::from_str("thresh").unwrap(), + NodeBuilder::new().leaf("thresh").into_tree() + ); assert!(matches!( Tree::from_str("thresh,").unwrap_err(), @@ -542,7 +801,14 @@ mod tests { Error::Parse(ParseError::Tree(ParseTreeError::TrailingCharacter { ch: 't', pos: 8 })), )); - assert_eq!(Tree::from_str("thresh()").unwrap(), paren_node("thresh", vec![leaf("")])); + assert_eq!( + Tree::from_str("thresh()").unwrap(), + NodeBuilder::new() + .open("thresh", '(') + .leaf("") + .close() + .into_tree() + ); assert!(matches!( Tree::from_str("thresh(a()b)"), @@ -612,7 +878,14 @@ mod tests { fn parse_tree_taproot() { assert_eq!( Tree::from_str("a{b(c),d}").unwrap(), - brace_node("a", vec![paren_node("b", vec![leaf("c")]), leaf("d")]), + NodeBuilder::new() + .open("a", '{') + .open("b", '(') + .leaf("c") + .close() + .leaf("d") + .close() + .into_tree() ); } @@ -626,16 +899,18 @@ mod tests { assert_eq!( Tree::from_str(&desc).unwrap(), - paren_node( - "wsh", - vec![paren_node( - "t:or_c", - vec![ - paren_node("pk", vec![leaf(keys[0])]), - paren_node("v:pkh", vec![leaf(keys[1])]), - ] - )] - ), + NodeBuilder::new() + .open("wsh", '(') + .open("t:or_c", '(') + .open("pk", '(') + .leaf(keys[0]) + .close() + .open("v:pkh", '(') + .leaf(keys[1]) + .close() + .close() + .close() + .into_tree() ); } } diff --git a/src/miniscript/astelem.rs b/src/miniscript/astelem.rs index 779ca7122..c990f31fa 100644 --- a/src/miniscript/astelem.rs +++ b/src/miniscript/astelem.rs @@ -18,24 +18,26 @@ use crate::util::MsKeyBuilder; use crate::{expression, Error, FromStrKey, Miniscript, MiniscriptKey, Terminal, ToPublicKey}; impl crate::expression::FromTree for Arc> { - fn from_tree(top: &expression::Tree) -> Result>, Error> { - Ok(Arc::new(expression::FromTree::from_tree(top)?)) + fn from_tree(root: expression::TreeIterItem) -> Result>, Error> { + Ok(Arc::new(expression::FromTree::from_tree(root)?)) } } impl crate::expression::FromTree for Terminal { - fn from_tree(top: &expression::Tree) -> Result, Error> { - let binary = - |node: &expression::Tree, name, termfn: fn(_, _) -> Self| -> Result { - node.verify_binary(name) - .map_err(From::from) - .map_err(Error::Parse) - .and_then(|(x, y)| { - let x = Arc::>::from_tree(x)?; - let y = Arc::>::from_tree(y)?; - Ok(termfn(x, y)) - }) - }; + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { + let binary = |node: expression::TreeIterItem, + name, + termfn: fn(_, _) -> Self| + -> Result { + node.verify_binary(name) + .map_err(From::from) + .map_err(Error::Parse) + .and_then(|(x, y)| { + let x = Arc::>::from_tree(x)?; + let y = Arc::>::from_tree(y)?; + Ok(termfn(x, y)) + }) + }; let (frag_wrap, frag_name) = top .name_separated(':') diff --git a/src/miniscript/mod.rs b/src/miniscript/mod.rs index b47d19e71..2bbfc4822 100644 --- a/src/miniscript/mod.rs +++ b/src/miniscript/mod.rs @@ -725,7 +725,7 @@ impl Miniscript { pub fn from_str_ext(s: &str, ext: &ExtParams) -> Result, Error> { // This checks for invalid ASCII chars let top = expression::Tree::from_str(s)?; - let ms: Miniscript = expression::FromTree::from_tree(&top)?; + let ms: Miniscript = expression::FromTree::from_tree(top.root())?; ms.ext_check(ext)?; if ms.ty.corr.base != types::Base::B { @@ -737,19 +737,19 @@ impl Miniscript { } impl crate::expression::FromTree for Arc> { - fn from_tree(top: &expression::Tree) -> Result>, Error> { - Ok(Arc::new(expression::FromTree::from_tree(top)?)) + fn from_tree(root: expression::TreeIterItem) -> Result>, Error> { + Ok(Arc::new(expression::FromTree::from_tree(root)?)) } } impl crate::expression::FromTree for Miniscript { /// Parse an expression tree into a Miniscript. As a general rule, this /// should not be called directly; rather go through the descriptor API. - fn from_tree(top: &expression::Tree) -> Result, Error> { - top.verify_no_curly_braces() + fn from_tree(root: expression::TreeIterItem) -> Result, Error> { + root.verify_no_curly_braces() .map_err(From::from) .map_err(Error::Parse)?; - let inner: Terminal = expression::FromTree::from_tree(top)?; + let inner: Terminal = expression::FromTree::from_tree(root)?; Miniscript::from_ast(inner) } } diff --git a/src/policy/concrete.rs b/src/policy/concrete.rs index f1401c583..8826b3cf7 100644 --- a/src/policy/concrete.rs +++ b/src/policy/concrete.rs @@ -835,7 +835,7 @@ impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { let tree = expression::Tree::from_str(s)?; - let policy: Policy = FromTree::from_tree(&tree)?; + let policy: Policy = FromTree::from_tree(tree.root())?; policy.check_timelocks().map_err(Error::ConcretePolicy)?; Ok(policy) } @@ -847,7 +847,7 @@ impl Policy { /// Helper function for `from_tree` to parse subexpressions with /// names of the form x@y fn from_tree_prob( - top: &expression::Tree, + top: expression::TreeIterItem, allow_prob: bool, ) -> Result<(usize, Policy), Error> { // When 'allow_prob' is true we parse '@' signs out of node names. @@ -935,8 +935,8 @@ impl Policy { } impl expression::FromTree for Policy { - fn from_tree(top: &expression::Tree) -> Result, Error> { - Policy::from_tree_prob(top, false).map(|(_, result)| result) + fn from_tree(root: expression::TreeIterItem) -> Result, Error> { + Policy::from_tree_prob(root, false).map(|(_, result)| result) } } diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs index 6f16b3d67..0c97c59c7 100644 --- a/src/policy/semantic.rs +++ b/src/policy/semantic.rs @@ -277,14 +277,14 @@ impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { let tree = expression::Tree::from_str(s)?; - expression::FromTree::from_tree(&tree) + expression::FromTree::from_tree(tree.root()) } } serde_string_impl_pk!(Policy, "a miniscript semantic policy"); impl expression::FromTree for Policy { - fn from_tree(top: &expression::Tree) -> Result, Error> { + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { match top.name() { "UNSATISFIABLE" => { top.verify_n_children("UNSATISFIABLE", 0..=0)