diff --git a/src/descriptor/bare.rs b/src/descriptor/bare.rs index 3cb0da61e..b41ee2850 100644 --- a/src/descriptor/bare.rs +++ b/src/descriptor/bare.rs @@ -175,8 +175,8 @@ impl Liftable for Bare { } impl FromTree for Bare { - fn from_tree(top: &expression::Tree) -> Result { - let sub = Miniscript::::from_tree(top)?; + fn from_tree(root: expression::TreeIterItem) -> Result { + let sub = Miniscript::::from_tree(root)?; BareCtx::top_level_checks(&sub)?; Bare::new(sub) } @@ -186,7 +186,7 @@ impl core::str::FromStr for Bare { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } @@ -369,8 +369,8 @@ impl Liftable for Pkh { } impl FromTree for Pkh { - fn from_tree(top: &expression::Tree) -> Result { - let pk = top + fn from_tree(root: expression::TreeIterItem) -> Result { + let pk = root .verify_terminal_parent("pkh", "public key") .map_err(Error::Parse)?; Pkh::new(pk).map_err(Error::ContextError) @@ -381,7 +381,7 @@ impl core::str::FromStr for Pkh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs index f9e673c2b..1b6bc788c 100644 --- a/src/descriptor/mod.rs +++ b/src/descriptor/mod.rs @@ -965,7 +965,7 @@ impl Descriptor { impl crate::expression::FromTree for Descriptor { /// Parse an expression tree into a descriptor. - fn from_tree(top: &expression::Tree) -> Result, Error> { + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { Ok(match (top.name(), top.n_children()) { ("pkh", 1) => Descriptor::Pkh(Pkh::from_tree(top)?), ("wpkh", 1) => Descriptor::Wpkh(Wpkh::from_tree(top)?), @@ -981,7 +981,7 @@ impl FromStr for Descriptor { type Err = Error; fn from_str(s: &str) -> Result, Error> { let top = expression::Tree::from_str(s)?; - let ret = Self::from_tree(&top)?; + let ret = Self::from_tree(top.root())?; if let Descriptor::Tr(ref inner) = ret { // FIXME preserve weird/broken behavior from 12.x. // See https://github.com/rust-bitcoin/rust-miniscript/issues/734 diff --git a/src/descriptor/segwitv0.rs b/src/descriptor/segwitv0.rs index b09a9f887..8ad173b76 100644 --- a/src/descriptor/segwitv0.rs +++ b/src/descriptor/segwitv0.rs @@ -247,7 +247,7 @@ impl Liftable for Wsh { } impl crate::expression::FromTree for Wsh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let top = top .verify_toplevel("wsh", 1..=1) .map_err(From::from) @@ -284,7 +284,7 @@ impl core::str::FromStr for Wsh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Wsh::::from_tree(&top) + Wsh::::from_tree(top.root()) } } @@ -483,7 +483,7 @@ impl Liftable for Wpkh { } impl crate::expression::FromTree for Wpkh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let pk = top .verify_terminal_parent("wpkh", "public key") .map_err(Error::Parse)?; @@ -495,7 +495,7 @@ impl core::str::FromStr for Wpkh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/sh.rs b/src/descriptor/sh.rs index f00e3f093..8b0195e8f 100644 --- a/src/descriptor/sh.rs +++ b/src/descriptor/sh.rs @@ -81,7 +81,7 @@ impl fmt::Display for Sh { } impl crate::expression::FromTree for Sh { - fn from_tree(top: &expression::Tree) -> Result { + fn from_tree(top: expression::TreeIterItem) -> Result { let top = top .verify_toplevel("sh", 1..=1) .map_err(From::from) @@ -105,7 +105,7 @@ impl core::str::FromStr for Sh { type Err = Error; fn from_str(s: &str) -> Result { let top = expression::Tree::from_str(s)?; - Self::from_tree(&top) + Self::from_tree(top.root()) } } diff --git a/src/descriptor/sortedmulti.rs b/src/descriptor/sortedmulti.rs index 85664635e..ff3f62227 100644 --- a/src/descriptor/sortedmulti.rs +++ b/src/descriptor/sortedmulti.rs @@ -59,7 +59,7 @@ impl SortedMultiVec { } /// Parse an expression tree into a SortedMultiVec - pub fn from_tree(tree: &expression::Tree) -> Result + pub fn from_tree(tree: expression::TreeIterItem) -> Result where Pk: FromStrKey, { diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs index ba68a73fa..ab385c940 100644 --- a/src/descriptor/tr.rs +++ b/src/descriptor/tr.rs @@ -14,7 +14,6 @@ use sync::Arc; use super::checksum; use crate::descriptor::DefiniteDescriptorKey; use crate::expression::{self, FromTree}; -use crate::iter::TreeLike as _; use crate::miniscript::satisfy::{Placeholder, Satisfaction, SchnorrSigType, Witness}; use crate::miniscript::Miniscript; use crate::plan::AssetProvider; @@ -495,99 +494,84 @@ impl core::str::FromStr for Tr { fn from_str(s: &str) -> Result { let expr_tree = expression::Tree::from_str(s)?; - Self::from_tree(&expr_tree) + Self::from_tree(expr_tree.root()) } } impl crate::expression::FromTree for Tr { - fn from_tree(expr_tree: &expression::Tree) -> Result { + fn from_tree(root: expression::TreeIterItem) -> Result { use crate::expression::{Parens, ParseTreeError}; - expr_tree - .verify_toplevel("tr", 1..=2) + struct TreeStack<'s, Pk: MiniscriptKey> { + inner: Vec<(expression::TreeIterItem<'s>, TapTree)>, + } + + impl<'s, Pk: MiniscriptKey> TreeStack<'s, Pk> { + fn new() -> Self { Self { inner: Vec::with_capacity(128) } } + + fn push(&mut self, parent: expression::TreeIterItem<'s>, tree: TapTree) { + let mut next_push = (parent, tree); + while let Some(top) = self.inner.pop() { + if next_push.0.index() == top.0.index() { + next_push.0 = top.0.parent().unwrap(); + next_push.1 = TapTree::combine(top.1, next_push.1); + } else { + self.inner.push(top); + break; + } + } + self.inner.push(next_push); + } + + fn pop_final(&mut self) -> Option> { + assert_eq!(self.inner.len(), 1); + self.inner.pop().map(|x| x.1) + } + } + + root.verify_toplevel("tr", 1..=2) .map_err(From::from) .map_err(Error::Parse)?; - let mut round_paren_depth = 0; + let mut root_children = root.children(); + let internal_key: Pk = root_children + .next() + .unwrap() // `verify_toplevel` above checked that first child existed + .verify_terminal("internal key") + .map_err(Error::Parse)?; - let mut internal_key = None; - let mut tree_stack = vec![]; + let tap_tree = match root_children.next() { + None => return Tr::new(internal_key, None), + Some(tree) => tree, + }; - for item in expr_tree.verbose_pre_order_iter() { - // Top-level "tr" node. - if item.index == 0 { - if item.is_complete { - debug_assert!( - internal_key.is_some(), - "checked above that top-level 'tr' has children" - ); + let mut tree_stack = TreeStack::new(); + let mut tap_tree_iter = tap_tree.pre_order_iter(); + // while let construction needed because we modify the iterator inside the loop + // (by calling skip_descendants to skip over the contents of the tapscripts). + while let Some(node) = tap_tree_iter.next() { + if node.parens() == Parens::Curly { + if !node.name().is_empty() { + return Err(Error::Parse(ParseError::Tree(ParseTreeError::IncorrectName { + actual: node.name().to_owned(), + expected: "", + }))); } - } else if item.index == 1 { - // First child of tr, which must be the internal key - internal_key = item - .node - .verify_terminal("internal key") - .map_err(Error::Parse) - .map(Some)?; + node.verify_n_children("taptree branch", 2..=2) + .map_err(From::from) + .map_err(Error::Parse)?; } else { - // From here on we are into the taptree. - if item.n_children_yielded == 0 { - match item.node.parens() { - Parens::Curly => { - if !item.node.name().is_empty() { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IncorrectName { - actual: item.node.name().to_owned(), - expected: "", - }, - ))); - } - if round_paren_depth > 0 { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IllegalCurlyBrace { - pos: item.node.children_pos(), - }, - ))); - } - } - Parens::Round => round_paren_depth += 1, - _ => {} - } - } - if item.is_complete { - if item.node.parens() == Parens::Curly { - if item.n_children_yielded == 2 { - let rchild = tree_stack.pop().unwrap(); - let lchild = tree_stack.pop().unwrap(); - tree_stack.push(TapTree::combine(lchild, rchild)); - } else { - return Err(Error::Parse(ParseError::Tree( - ParseTreeError::IncorrectNumberOfChildren { - description: "Taptree node", - n_children: item.n_children_yielded, - minimum: Some(2), - maximum: Some(2), - }, - ))); - } - } else { - if item.node.parens() == Parens::Round { - round_paren_depth -= 1; - } - if round_paren_depth == 0 { - let script = Miniscript::from_tree(item.node)?; - // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 - if script.ty.corr.base != crate::miniscript::types::Base::B { - return Err(Error::NonTopLevel(format!("{:?}", script))); - }; - tree_stack.push(TapTree::Leaf(Arc::new(script))); - } - } - } + let script = Miniscript::from_tree(node)?; + // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 + if script.ty.corr.base != crate::miniscript::types::Base::B { + return Err(Error::NonTopLevel(format!("{:?}", script))); + }; + + tree_stack.push(node.parent().unwrap(), TapTree::Leaf(Arc::new(script))); + tap_tree_iter.skip_descendants(); } } - assert!(tree_stack.len() <= 1); - Tr::new(internal_key.unwrap(), tree_stack.pop()) + Tr::new(internal_key, tree_stack.pop_final()) } } diff --git a/src/expression/mod.rs b/src/expression/mod.rs index 390082cd1..7f7057095 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -1,6 +1,28 @@ // SPDX-License-Identifier: CC0-1.0 -//! # Function-like Expression Language +//! Expression Trees +//! +//! This module represents expression trees, which are trees whose nodes have +//! names and arbitrary numbers of children. As strings, they are defined by +//! the following rules: +//! +//! * Any sequence of valid descriptor characters, including the empty string, is a "name". +//! * A name is an expression (called a "leaf"). +//! * Given n expression trees `s_1`, ..., `s_n` and a name `X`, `X(s_1,...,s_n)` is an expression. +//! * Given n expression trees `s_1`, ..., `s_n` and a name `X`, `X{s_1,...,s_n}` is an expression. +//! +//! Note that while `leaf` and `leaf()` are both expressions, only the former is +//! actually a leaf. The latter has one child which is a leaf with an empty name. +//! If these are intended to be equivalent, the caller must add logic to do this +//! when converting the expression tree into its final type. +//! +//! All recursive structures in this library can be serialized and parsed as trees, +//! though of course each data structure further limits the grammar (e.g. to enforce +//! that names be valid Miniscript fragment names, public keys, hashes or timelocks). +//! +//! Users of this library probably do not need to use this module at all, unless they +//! are implementing their own Miniscript-like structures or extensions to Miniscript. +//! It is intended to be used as a utility to implement string parsing. //! mod error; @@ -11,53 +33,131 @@ use core::str::FromStr; pub use self::error::{ParseNumError, ParseThresholdError, ParseTreeError}; use crate::blanket_traits::StaticDebugAndDisplay; use crate::descriptor::checksum::verify_checksum; -use crate::iter::{self, TreeLike}; use crate::prelude::*; use crate::{AbsLockTime, Error, ParseError, RelLockTime, Threshold, MAX_RECURSION_DEPTH}; /// Allowed characters are descriptor strings. pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; -#[derive(Debug)] -/// A token of the form `x(...)` or `x` -pub struct Tree<'a> { - /// The name `x` - name: &'a str, - /// Position one past the last character of the node's name. If it has - /// children, the position of the '(' or '{'. - children_pos: usize, - /// The type of parentheses surrounding the node's children. +/// Internal data structure representing a node of an expression tree. +/// +/// Users of the public API will always interact with this using the +/// wrapper type [`TreeIterItem`] which also contains a reference to +/// the whole tree. +#[derive(Debug, PartialEq, Eq)] +struct TreeNode<'s> { + name: &'s str, + name_pos: usize, parens: Parens, - /// The comma-separated contents of the `(...)`, if any - args: Vec>, + n_children: usize, + index: usize, + parent_idx: Option, + last_child_idx: Option, + right_sibling_idx: Option, } -impl PartialEq for Tree<'_> { - fn eq(&self, other: &Self) -> bool { - let mut stack = vec![(self, other)]; - while let Some((me, you)) = stack.pop() { - if me.name != you.name || me.args.len() != you.args.len() { - return false; - } - stack.extend(me.args.iter().zip(you.args.iter())); +impl TreeNode<'_> { + fn null(index: usize) -> Self { + TreeNode { + name: "", + name_pos: 0, + parens: Parens::None, + n_children: 0, + index, + parent_idx: None, + last_child_idx: None, + right_sibling_idx: None, } - true } } -impl Eq for Tree<'_> {} - -impl<'a, 't> TreeLike for &'t Tree<'a> { - type NaryChildren = &'t [Tree<'a>]; - fn nary_len(tc: &Self::NaryChildren) -> usize { tc.len() } - fn nary_index(tc: Self::NaryChildren, idx: usize) -> Self { &tc[idx] } +/// An iterator over the nodes of a tree, in pre-order. +/// +/// This has several differences from the pre-order iterator provided by [`crate::iter::TreeLike`]: +/// +/// * this is double-ended, so a right-to-left post-order iterator can be obtained by `.rev()`. +/// * the yielded items represent sub-trees which themselves can be iterated from +/// * the iterator can be told to skip all descendants of the current node, using +/// [`PreOrderIter::skip_descendants`]. +pub struct PreOrderIter<'s> { + nodes: &'s [TreeNode<'s>], + inner: core::ops::RangeInclusive, +} - fn as_node(&self) -> iter::Tree { - if self.args.is_empty() { - iter::Tree::Nullary - } else { - iter::Tree::Nary(&self.args) +impl PreOrderIter<'_> { + /// Skip all the descendants of the most recently-yielded item. + /// + /// Here "most recently-yielded item" means the most recently-yielded item when + /// running the iterator forward. If you run the iterator backward, e.g. by iterating + /// on `iter.by_ref().rev()`, those items are not considered, and the resulting + /// behavior of this function may be surprising. + /// + /// If this method is called before any nodes have been yielded, the entire iterator + /// will be skipped. + pub fn skip_descendants(&mut self) { + if self.inner.is_empty() { + return; } + + let last_index = self.inner.start().saturating_sub(1); + // Construct a synthetic iterator over all descendants + let last_item = TreeIterItem { nodes: self.nodes, index: last_index }; + let skip_past = last_item.rightmost_descendant_idx(); + // ...and copy the indices out of that. + debug_assert!(skip_past + 1 >= *self.inner.start()); + debug_assert!(skip_past <= *self.inner.end()); + self.inner = skip_past + 1..=*self.inner.end(); + } +} + +impl<'s> Iterator for PreOrderIter<'s> { + type Item = TreeIterItem<'s>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|n| TreeIterItem { nodes: self.nodes, index: n }) + } + + fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } +} + +impl DoubleEndedIterator for PreOrderIter<'_> { + fn next_back(&mut self) -> Option { + self.inner + .next_back() + .map(|n| TreeIterItem { nodes: self.nodes, index: n }) + } +} + +impl ExactSizeIterator for PreOrderIter<'_> { + // The inner `RangeInclusive` does not impl ExactSizeIterator because the + // range 0..=usize::MAX would have length usize::MAX + 1. But we know + // that our range is limited by the `n_nodes` variable returned by + // `parse_pre_check`, and if THAT didn't overflow then this won't either. +} + +/// A tree node, as yielded from an iterator. +#[derive(Copy, Clone)] +pub struct TreeIterItem<'s> { + nodes: &'s [TreeNode<'s>], + index: usize, +} + +/// An iterator over the direct children of a tree node. +pub struct DirectChildIterator<'s> { + current: Option>, +} + +impl<'s> Iterator for DirectChildIterator<'s> { + type Item = TreeIterItem<'s>; + + fn next(&mut self) -> Option { + let item = self.current.take()?; + self.current = item.nodes[item.index] + .right_sibling_idx + .map(|n| TreeIterItem { nodes: item.nodes, index: n }); + Some(item) } } @@ -75,35 +175,87 @@ pub enum Parens { /// A trait for extracting a structure from a Tree representation in token form pub trait FromTree: Sized { /// Extract a structure from Tree representation - fn from_tree(top: &Tree) -> Result; + fn from_tree(root: TreeIterItem) -> Result; } -impl<'a> Tree<'a> { +impl<'s> TreeIterItem<'s> { /// The name of this tree node. - pub fn name(&self) -> &str { self.name } + pub fn name(self) -> &'s str { self.nodes[self.index].name } /// The 0-indexed byte-position of the name in the original expression tree. - pub fn name_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + pub fn name_pos(self) -> usize { self.nodes[self.index].name_pos } /// The 0-indexed byte-position of the '(' or '{' character which starts the /// expression's children. /// /// If the expression has no children, returns one past the end of the name. - pub fn children_pos(&self) -> usize { self.children_pos - self.name.len() - 1 } + pub fn children_pos(self) -> usize { self.name_pos() + self.name().len() + 1 } /// The number of children this node has. - pub fn n_children(&self) -> usize { self.args.len() } + pub fn n_children(self) -> usize { self.nodes[self.index].n_children } /// The type of parenthesis surrounding this node's children. /// /// If the node has no children, this will be `Parens::None`. - pub fn parens(&self) -> Parens { self.parens } + pub fn parens(self) -> Parens { self.nodes[self.index].parens } /// An iterator over the direct children of this node. /// - /// If you want to iterate recursively, use the [`TreeLike`] API which - /// provides methods `pre_order_iter` and `post_order_iter`. - pub fn children(&self) -> impl ExactSizeIterator { self.args.iter() } + /// If you want to iterate recursively, use the [`Self::pre_order_iter`] + /// or [`Self::rtl_post_order_iter`] method. + pub fn children(self) -> DirectChildIterator<'s> { + DirectChildIterator { current: self.first_child() } + } + + /// The index of the node in its underlying tree. + pub fn index(&self) -> usize { self.index } + + /// Accessor for the parent of the node, if it has a parent (is not the root). + pub fn parent(self) -> Option { + self.nodes[self.index] + .parent_idx + .map(|n| Self { nodes: self.nodes, index: n }) + } + + /// Whether the node is the first child of its parent. + /// + /// Returns false for the root. + pub fn is_first_child(self) -> bool { + self.nodes[self.index] + .parent_idx + .map(|n| n + 1 == self.index) + .unwrap_or(false) + } + + /// Accessor for the first child of the node, if it has a first child. + pub fn first_child(self) -> Option { + // If the node has any children at all, its first child is the one right after it. + self.nodes[self.index] + .last_child_idx + .map(|_| Self { nodes: self.nodes, index: self.index + 1 }) + } + + /// Accessor for the sibling of the node, if it has one. + pub fn right_sibling(self) -> Option { + self.nodes[self.index] + .right_sibling_idx + .map(|n| Self { nodes: self.nodes, index: n }) + } + + /// Helper function to find the rightmost descendant of a node. + /// + /// Used to construct iterators which cover only the node and its descendants. + /// If the node has no descendants, returns its own index. + fn rightmost_descendant_idx(self) -> usize { + let mut scan = self.index; + while let Some(idx) = self.nodes[scan].last_child_idx { + scan = idx; + while let Some(idx) = self.nodes[scan].right_sibling_idx { + scan = idx; + } + } + scan + } /// Split the name by a separating character. /// @@ -111,15 +263,18 @@ impl<'a> Tree<'a> { /// the suffix after the separator. Otherwise returns the whole name. /// /// If the separator occurs multiple times, returns an error. - pub fn name_separated(&self, separator: char) -> Result<(Option<&str>, &str), ParseTreeError> { - let mut name_split = self.name.splitn(3, separator); + pub fn name_separated( + self, + separator: char, + ) -> Result<(Option<&'s str>, &'s str), ParseTreeError> { + let mut name_split = self.name().splitn(3, separator); match (name_split.next(), name_split.next(), name_split.next()) { (None, _, _) => unreachable!("'split' always yields at least one element"), - (Some(_), None, _) => Ok((None, self.name)), + (Some(_), None, _) => Ok((None, self.name())), (Some(prefix), Some(name), None) => Ok((Some(prefix), name)), (Some(_), Some(_), Some(suffix)) => Err(ParseTreeError::MultipleSeparators { separator, - pos: self.children_pos - suffix.len() - 1, + pos: self.children_pos() - suffix.len() - 1, }), } } @@ -129,7 +284,7 @@ impl<'a> Tree<'a> { /// The `description` argument is only used to populate the error return, /// and is not validated in any way. pub fn verify_n_children( - &self, + self, description: &'static str, n_children: impl ops::RangeBounds, ) -> Result<(), ParseTreeError> { @@ -167,19 +322,19 @@ impl<'a> Tree<'a> { &self, name: &'static str, n_children: impl ops::RangeBounds, - ) -> Result<&Self, ParseTreeError> { + ) -> Result { assert!( !n_children.contains(&0), "verify_toplevel is intended for nodes with >= 1 child" ); - if self.name != name { - Err(ParseTreeError::IncorrectName { actual: self.name.to_owned(), expected: name }) - } else if self.parens == Parens::Curly { - Err(ParseTreeError::IllegalCurlyBrace { pos: self.children_pos }) + if self.name() != name { + Err(ParseTreeError::IncorrectName { actual: self.name().to_owned(), expected: name }) + } else if self.parens() == Parens::Curly { + Err(ParseTreeError::IllegalCurlyBrace { pos: self.children_pos() }) } else { self.verify_n_children(name, n_children)?; - Ok(&self.args[0]) + Ok(self.first_child().unwrap()) } } @@ -191,10 +346,11 @@ impl<'a> Tree<'a> { pub fn verify_after(&self) -> Result { self.verify_n_children("after", 1..=1) .map_err(ParseError::Tree)?; - self.args[0] + let child = self.first_child().unwrap(); + child .verify_n_children("absolute locktime", 0..=0) .map_err(ParseError::Tree)?; - parse_num(self.args[0].name) + parse_num(child.name()) .map_err(ParseError::Num) .and_then(|n| AbsLockTime::from_consensus(n).map_err(ParseError::AbsoluteLockTime)) } @@ -207,10 +363,11 @@ impl<'a> Tree<'a> { pub fn verify_older(&self) -> Result { self.verify_n_children("older", 1..=1) .map_err(ParseError::Tree)?; - self.args[0] + let child = self.first_child().unwrap(); + child .verify_n_children("relative locktime", 0..=0) .map_err(ParseError::Tree)?; - parse_num(self.args[0].name) + parse_num(child.name()) .map_err(ParseError::Num) .and_then(|n| RelLockTime::from_consensus(n).map_err(ParseError::RelativeLockTime)) } @@ -228,7 +385,7 @@ impl<'a> Tree<'a> { { self.verify_n_children(description, 0..=0) .map_err(ParseError::Tree)?; - T::from_str(self.name).map_err(ParseError::box_from_str) + T::from_str(self.name()).map_err(ParseError::box_from_str) } /// Check that a tree node has exactly one child, which is a terminal. @@ -248,7 +405,9 @@ impl<'a> Tree<'a> { { self.verify_n_children(description, 1..=1) .map_err(ParseError::Tree)?; - self.args[0].verify_terminal(inner_description) + self.first_child() + .unwrap() + .verify_terminal(inner_description) } /// Check that a tree node has exactly two children. @@ -257,12 +416,11 @@ impl<'a> Tree<'a> { /// /// The `description` argument is only used to populate the error return, /// and is not validated in any way. - pub fn verify_binary( - &self, - description: &'static str, - ) -> Result<(&Self, &Self), ParseTreeError> { + pub fn verify_binary(&self, description: &'static str) -> Result<(Self, Self), ParseTreeError> { self.verify_n_children(description, 2..=2)?; - Ok((&self.args[0], &self.args[1])) + let first_child = self.first_child().unwrap(); + let second_child = first_child.right_sibling().unwrap(); + Ok((first_child, second_child)) } /// Parses an expression tree as a threshold (a term with at least one child, @@ -279,11 +437,11 @@ impl<'a> Tree<'a> { /// and be able to return multiple error types.) pub fn verify_threshold< const MAX: usize, - F: FnMut(&Self) -> Result, + F: FnMut(Self) -> Result, T, E: From, >( - &self, + &'s self, mut map_child: F, ) -> Result, E> { let mut child_iter = self.children(); @@ -303,26 +461,59 @@ impl<'a> Tree<'a> { .and_then(|thresh| thresh.translate_by_index(|_| map_child(child_iter.next().unwrap()))) } + /// Returns an iterator over the nodes of the tree, in pre-order. + /// + /// Constructing the iterator takes O(depth) time. + pub fn pre_order_iter(&'s self) -> PreOrderIter<'s> { + PreOrderIter { nodes: self.nodes, inner: self.index..=self.rightmost_descendant_idx() } + } + + /// Returns an iterator over the nodes of the tree, in right-to-left post-order. + pub fn rtl_post_order_iter(&'s self) -> core::iter::Rev> { + self.pre_order_iter().rev() + } + /// Check that a tree has no curly-brace children in it. pub fn verify_no_curly_braces(&self) -> Result<(), ParseTreeError> { - for tree in self.pre_order_iter() { - if tree.parens == Parens::Curly { - return Err(ParseTreeError::IllegalCurlyBrace { pos: tree.children_pos }); + for node in self.rtl_post_order_iter() { + if node.parens() == Parens::Curly { + return Err(ParseTreeError::IllegalCurlyBrace { pos: node.children_pos() }); } } Ok(()) } +} + +#[derive(Debug, PartialEq, Eq)] +/// A parsed expression tree. See module-level documentation for syntax. +pub struct Tree<'s> { + /// The nodes, stored in pre-order. + nodes: Vec>, +} + +impl<'a> Tree<'a> { + /// Returns the root node of the tree, or `None` if the tree is empty. + pub fn root(&'a self) -> TreeIterItem<'a> { + assert_ne!( + self.nodes.len(), + 0, + "trees cannot be empty; the empty string parses as a single root with empty name" + ); + TreeIterItem { nodes: &self.nodes, index: 0 } + } /// Check that a string is a well-formed expression string, with optional /// checksum. /// - /// Returns the string with the checksum removed and its tree depth. - fn parse_pre_check(s: &str) -> Result<(&str, usize), ParseTreeError> { + /// Returns the string with the checksum removed, the maximum depth, and the + /// number of nodes in the tree. + fn parse_pre_check(s: &str) -> Result<(&str, usize, usize), ParseTreeError> { // First, scan through string to make sure it is well-formed. // Do ASCII/checksum check first; after this we can use .bytes().enumerate() rather // than .char_indices(), which is *significantly* faster. let s = verify_checksum(s)?; + let mut n_nodes = 1; let mut max_depth = 0; let mut open_paren_stack = Vec::with_capacity(128); for (pos, ch) in s.bytes().enumerate() { @@ -380,9 +571,15 @@ impl<'a> Tree<'a> { // now. return Err(ParseTreeError::UnmatchedCloseParen { ch: ch.into(), pos }); } - } else if ch == b',' && open_paren_stack.is_empty() { - // We consider commas outside of the tree to be "trailing characters" - return Err(ParseTreeError::TrailingCharacter { ch: ch.into(), pos }); + + n_nodes += 1; + } else if ch == b',' { + if open_paren_stack.is_empty() { + // We consider commas outside of the tree to be "trailing characters" + return Err(ParseTreeError::TrailingCharacter { ch: ch.into(), pos }); + } + + n_nodes += 1; } } // Catch "early end of string" @@ -399,7 +596,7 @@ impl<'a> Tree<'a> { }); } - Ok((s, max_depth)) + Ok((s, max_depth, n_nodes)) } /// Parses a tree from a string @@ -411,61 +608,73 @@ impl<'a> Tree<'a> { } fn from_str_inner(s: &'a str) -> Result { + fn new_node<'a>(nodes: &mut [TreeNode<'a>], stack: &[usize], pos: usize) -> TreeNode<'a> { + let parent_idx = stack.last().copied(); + if let Some(idx) = parent_idx { + nodes[idx].n_children += 1; + nodes[idx].last_child_idx = Some(nodes.len()); + } + + let mut new = TreeNode::null(nodes.len()); + new.name_pos = pos; + new.parent_idx = parent_idx; + new + } + // First, scan through string to make sure it is well-formed. - let (s, max_depth) = Self::parse_pre_check(s)?; - - // Now, knowing it is sane and well-formed, we can easily parse it backward, - // which will yield a post-order right-to-left iterator of its nodes. - let mut stack = Vec::with_capacity(max_depth); - let mut children_parens: Option<(Vec<_>, usize, Parens)> = None; - let mut node_name_end = s.len(); - for (pos, ch) in s.bytes().enumerate().rev() { - if ch == b')' || ch == b'}' { - stack.push(vec![]); - node_name_end = pos; + let (s, max_depth, n_nodes) = Self::parse_pre_check(s)?; + + let mut nodes = Vec::with_capacity(n_nodes); + + // Now, knowing it is sane and well-formed, we can easily parse it forward, + // as the string serialization lists all the nodes in pre-order. + let mut parent_stack = Vec::with_capacity(max_depth); + let mut current_node = Some(TreeNode::null(0)); + for (pos, ch) in s.bytes().enumerate() { + if ch == b'(' || ch == b'{' { + let mut current = current_node.expect("'(' only occurs after a node name"); + current.name = &s[current.name_pos..pos]; + current.parens = match ch { + b'(' => Parens::Round, + b'{' => Parens::Curly, + _ => unreachable!(), + }; + parent_stack.push(nodes.len()); + nodes.push(current); + + current_node = Some(new_node(&mut nodes, &parent_stack, pos + 1)); } else if ch == b',' { - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - - let top = stack.last_mut().unwrap(); - let new_tree = - Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; - top.push(new_tree); - node_name_end = pos; - } else if ch == b'(' || ch == b'{' { - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - - let mut top = stack.pop().unwrap(); - let new_tree = - Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; - top.push(new_tree); - children_parens = Some(( - top, - pos, - match ch { - b'(' => Parens::Round, - b'{' => Parens::Curly, - _ => unreachable!(), - }, - )); - node_name_end = pos; + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..pos]; + nodes.push(current); + } + + if let Some(last_sib_idx) = + parent_stack.last().and_then(|n| nodes[*n].last_child_idx) + { + nodes[last_sib_idx].right_sibling_idx = Some(nodes.len()); + } + current_node = Some(new_node(&mut nodes, &parent_stack, pos + 1)); + } else if ch == b')' || ch == b'}' { + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..pos]; + nodes.push(current); + } + + current_node = None; + parent_stack.pop(); } } + if let Some(mut current) = current_node { + current.name = &s[current.name_pos..]; + nodes.push(current); + } + + assert_eq!(parent_stack.capacity(), max_depth); + assert_eq!(nodes.capacity(), n_nodes); + assert_eq!(nodes.len(), nodes.capacity()); - assert_eq!(stack.len(), 0); - let (mut args, children_pos, parens) = - children_parens - .take() - .unwrap_or((vec![], node_name_end, Parens::None)); - args.reverse(); - Ok(Tree { name: &s[..node_name_end], children_pos, parens, args }) + Ok(Tree { nodes }) } } @@ -488,29 +697,76 @@ mod tests { use super::*; use crate::ParseError; - /// Test functions to manually build trees - fn leaf(name: &str) -> Tree { - Tree { name, parens: Parens::None, children_pos: name.len(), args: vec![] } + struct NodeBuilder<'a> { + inner: Vec>, + sibling_stack: Vec>, + parent_stack: Vec, + str_idx: usize, } - fn paren_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { - let mut offset = name.len() + 1; // +1 for open paren - for arg in &mut args { - arg.children_pos += offset; - offset += arg.name.len() + 1; // +1 for comma + impl<'a> NodeBuilder<'a> { + fn new() -> Self { + NodeBuilder { + inner: vec![], + sibling_stack: vec![None], + parent_stack: vec![], + str_idx: 0, + } } - Tree { name, parens: Parens::Round, children_pos: name.len(), args } - } + fn new_node_internal(&mut self, name: &'a str) -> TreeNode<'a> { + let mut new = TreeNode::null(self.inner.len()); + if let Some(idx) = self.parent_stack.last().copied() { + self.inner[idx].n_children += 1; + self.inner[idx].last_child_idx = Some(self.inner.len()); + new.parent_idx = Some(idx); + } + if let Some(idx) = self.sibling_stack.last().unwrap() { + self.inner[*idx].right_sibling_idx = Some(self.inner.len()); + self.str_idx += 1; + } + new.name = name; + new.name_pos = self.str_idx; + + *self.sibling_stack.last_mut().unwrap() = Some(self.inner.len()); + self.str_idx += name.len(); + new + } + + fn leaf(mut self, name: &'a str) -> Self { + let new = self.new_node_internal(name); - fn brace_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { - let mut offset = name.len() + 1; // +1 for open paren - for arg in &mut args { - arg.children_pos += offset; - offset += arg.name.len() + 1; // +1 for comma + self.inner.push(new); + self } - Tree { name, parens: Parens::Curly, children_pos: name.len(), args } + fn open(mut self, name: &'a str, paren: char) -> Self { + let mut new = self.new_node_internal(name); + + new.parens = match paren { + '(' => Parens::Round, + '{' => Parens::Curly, + _ => panic!(), + }; + self.str_idx += 1; + + self.parent_stack.push(self.inner.len()); + self.sibling_stack.push(None); + self.inner.push(new); + self + } + + fn close(mut self) -> Self { + self.str_idx += 1; + self.parent_stack.pop(); + self.sibling_stack.pop(); + self + } + + fn into_tree(self) -> Tree<'a> { + assert_eq!(self.parent_stack.len(), 0); + Tree { nodes: self.inner } + } } #[test] @@ -525,7 +781,10 @@ mod tests { #[test] fn parse_tree_basic() { - assert_eq!(Tree::from_str("thresh").unwrap(), leaf("thresh")); + assert_eq!( + Tree::from_str("thresh").unwrap(), + NodeBuilder::new().leaf("thresh").into_tree() + ); assert!(matches!( Tree::from_str("thresh,").unwrap_err(), @@ -542,7 +801,14 @@ mod tests { Error::Parse(ParseError::Tree(ParseTreeError::TrailingCharacter { ch: 't', pos: 8 })), )); - assert_eq!(Tree::from_str("thresh()").unwrap(), paren_node("thresh", vec![leaf("")])); + assert_eq!( + Tree::from_str("thresh()").unwrap(), + NodeBuilder::new() + .open("thresh", '(') + .leaf("") + .close() + .into_tree() + ); assert!(matches!( Tree::from_str("thresh(a()b)"), @@ -612,7 +878,14 @@ mod tests { fn parse_tree_taproot() { assert_eq!( Tree::from_str("a{b(c),d}").unwrap(), - brace_node("a", vec![paren_node("b", vec![leaf("c")]), leaf("d")]), + NodeBuilder::new() + .open("a", '{') + .open("b", '(') + .leaf("c") + .close() + .leaf("d") + .close() + .into_tree() ); } @@ -626,16 +899,18 @@ mod tests { assert_eq!( Tree::from_str(&desc).unwrap(), - paren_node( - "wsh", - vec![paren_node( - "t:or_c", - vec![ - paren_node("pk", vec![leaf(keys[0])]), - paren_node("v:pkh", vec![leaf(keys[1])]), - ] - )] - ), + NodeBuilder::new() + .open("wsh", '(') + .open("t:or_c", '(') + .open("pk", '(') + .leaf(keys[0]) + .close() + .open("v:pkh", '(') + .leaf(keys[1]) + .close() + .close() + .close() + .into_tree() ); } } diff --git a/src/miniscript/astelem.rs b/src/miniscript/astelem.rs index 779ca7122..c990f31fa 100644 --- a/src/miniscript/astelem.rs +++ b/src/miniscript/astelem.rs @@ -18,24 +18,26 @@ use crate::util::MsKeyBuilder; use crate::{expression, Error, FromStrKey, Miniscript, MiniscriptKey, Terminal, ToPublicKey}; impl crate::expression::FromTree for Arc> { - fn from_tree(top: &expression::Tree) -> Result>, Error> { - Ok(Arc::new(expression::FromTree::from_tree(top)?)) + fn from_tree(root: expression::TreeIterItem) -> Result>, Error> { + Ok(Arc::new(expression::FromTree::from_tree(root)?)) } } impl crate::expression::FromTree for Terminal { - fn from_tree(top: &expression::Tree) -> Result, Error> { - let binary = - |node: &expression::Tree, name, termfn: fn(_, _) -> Self| -> Result { - node.verify_binary(name) - .map_err(From::from) - .map_err(Error::Parse) - .and_then(|(x, y)| { - let x = Arc::>::from_tree(x)?; - let y = Arc::>::from_tree(y)?; - Ok(termfn(x, y)) - }) - }; + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { + let binary = |node: expression::TreeIterItem, + name, + termfn: fn(_, _) -> Self| + -> Result { + node.verify_binary(name) + .map_err(From::from) + .map_err(Error::Parse) + .and_then(|(x, y)| { + let x = Arc::>::from_tree(x)?; + let y = Arc::>::from_tree(y)?; + Ok(termfn(x, y)) + }) + }; let (frag_wrap, frag_name) = top .name_separated(':') diff --git a/src/miniscript/mod.rs b/src/miniscript/mod.rs index b47d19e71..2bbfc4822 100644 --- a/src/miniscript/mod.rs +++ b/src/miniscript/mod.rs @@ -725,7 +725,7 @@ impl Miniscript { pub fn from_str_ext(s: &str, ext: &ExtParams) -> Result, Error> { // This checks for invalid ASCII chars let top = expression::Tree::from_str(s)?; - let ms: Miniscript = expression::FromTree::from_tree(&top)?; + let ms: Miniscript = expression::FromTree::from_tree(top.root())?; ms.ext_check(ext)?; if ms.ty.corr.base != types::Base::B { @@ -737,19 +737,19 @@ impl Miniscript { } impl crate::expression::FromTree for Arc> { - fn from_tree(top: &expression::Tree) -> Result>, Error> { - Ok(Arc::new(expression::FromTree::from_tree(top)?)) + fn from_tree(root: expression::TreeIterItem) -> Result>, Error> { + Ok(Arc::new(expression::FromTree::from_tree(root)?)) } } impl crate::expression::FromTree for Miniscript { /// Parse an expression tree into a Miniscript. As a general rule, this /// should not be called directly; rather go through the descriptor API. - fn from_tree(top: &expression::Tree) -> Result, Error> { - top.verify_no_curly_braces() + fn from_tree(root: expression::TreeIterItem) -> Result, Error> { + root.verify_no_curly_braces() .map_err(From::from) .map_err(Error::Parse)?; - let inner: Terminal = expression::FromTree::from_tree(top)?; + let inner: Terminal = expression::FromTree::from_tree(root)?; Miniscript::from_ast(inner) } } diff --git a/src/policy/concrete.rs b/src/policy/concrete.rs index f1401c583..8826b3cf7 100644 --- a/src/policy/concrete.rs +++ b/src/policy/concrete.rs @@ -835,7 +835,7 @@ impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { let tree = expression::Tree::from_str(s)?; - let policy: Policy = FromTree::from_tree(&tree)?; + let policy: Policy = FromTree::from_tree(tree.root())?; policy.check_timelocks().map_err(Error::ConcretePolicy)?; Ok(policy) } @@ -847,7 +847,7 @@ impl Policy { /// Helper function for `from_tree` to parse subexpressions with /// names of the form x@y fn from_tree_prob( - top: &expression::Tree, + top: expression::TreeIterItem, allow_prob: bool, ) -> Result<(usize, Policy), Error> { // When 'allow_prob' is true we parse '@' signs out of node names. @@ -935,8 +935,8 @@ impl Policy { } impl expression::FromTree for Policy { - fn from_tree(top: &expression::Tree) -> Result, Error> { - Policy::from_tree_prob(top, false).map(|(_, result)| result) + fn from_tree(root: expression::TreeIterItem) -> Result, Error> { + Policy::from_tree_prob(root, false).map(|(_, result)| result) } } diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs index 6f16b3d67..0c97c59c7 100644 --- a/src/policy/semantic.rs +++ b/src/policy/semantic.rs @@ -277,14 +277,14 @@ impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { let tree = expression::Tree::from_str(s)?; - expression::FromTree::from_tree(&tree) + expression::FromTree::from_tree(tree.root()) } } serde_string_impl_pk!(Policy, "a miniscript semantic policy"); impl expression::FromTree for Policy { - fn from_tree(top: &expression::Tree) -> Result, Error> { + fn from_tree(top: expression::TreeIterItem) -> Result, Error> { match top.name() { "UNSATISFIABLE" => { top.verify_n_children("UNSATISFIABLE", 0..=0)