From e6fbea4c2c3c6d06cdc50e093a0b5cdaaef0f4c7 Mon Sep 17 00:00:00 2001 From: Vladimir Motylenko Date: Tue, 18 Jul 2023 15:03:19 +0300 Subject: [PATCH] Feat: add number support in NodeName attribute Make NodeName compatible with SGML basic types specification (by adding support of more than one punctuation in series). --- src/node/node_name.rs | 159 ++++++++++++++++++++++++++++++++++++++---- src/parser/mod.rs | 75 +------------------- tests/test.rs | 24 +++++++ 3 files changed, 172 insertions(+), 86 deletions(-) diff --git a/src/node/node_name.rs b/src/node/node_name.rs index 89acfde..a10d1fa 100644 --- a/src/node/node_name.rs +++ b/src/node/node_name.rs @@ -1,16 +1,60 @@ -use std::{convert::TryFrom, fmt}; +use std::{ + convert::TryFrom, + fmt::{self, Display}, +}; use proc_macro2::Punct; use syn::{ ext::IdentExt, - parse::{discouraged::Speculative, Parse}, + parse::{discouraged::Speculative, Parse, ParseStream, Peek}, punctuated::{Pair, Punctuated}, - token::{Brace, Colon, PathSep}, - Block, ExprPath, Ident, Path, PathSegment, + token::{Brace, Colon, Dot, PathSep}, + Block, ExprPath, Ident, LitInt, Path, PathSegment, }; use super::{atoms::tokens::Dash, path_to_string}; -use crate::{node::parse::block_expr, Error, Parser}; +use crate::{node::parse::block_expr, Error}; + +#[derive(Clone, Debug, syn_derive::Parse, syn_derive::ToTokens)] +pub enum NodeNameFragment { + #[parse(peek = Ident::peek_any)] + Ident(#[parse(Ident::parse_any)] Ident), + #[parse(peek = LitInt)] + Literal(LitInt), + // In case when name contain more than one Punct in series + Empty, +} +impl NodeNameFragment { + fn peek_any(input: ParseStream) -> bool { + input.peek(Ident::peek_any) || input.peek(LitInt) + } +} + +impl PartialEq for NodeNameFragment { + fn eq(&self, other: &NodeNameFragment) -> bool { + match (self, other) { + (NodeNameFragment::Ident(s), NodeNameFragment::Ident(o)) => s == o, + // compare literals by their string representation + // So 0x00 and 0 is would be different literals. + (NodeNameFragment::Literal(s), NodeNameFragment::Literal(o)) => { + s.to_string() == o.to_string() + } + (NodeNameFragment::Empty, NodeNameFragment::Empty) => true, + _ => false, + } + } +} +impl Eq for NodeNameFragment {} + +impl Display for NodeNameFragment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + NodeNameFragment::Ident(i) => i.fmt(f), + NodeNameFragment::Literal(l) => l.fmt(f), + NodeNameFragment::Empty => Ok(()), + } + } +} /// Name of the node. #[derive(Clone, Debug, syn_derive::ToTokens)] @@ -19,9 +63,25 @@ pub enum NodeName { /// be separated by double colons, e.g. ``. Path(ExprPath), + /// /// Name separated by punctuation, e.g. `
` or `
`. - Punctuated(Punctuated), + /// + /// It is fully compatible with SGML (ID/NAME) tokens format. + /// Which is described as follow: + /// ID and NAME tokens must begin with a letter ([A-Za-z]) and may be + /// followed by any number of letters, digits ([0-9]), hyphens ("-"), + /// underscores ("_"), colons (":"), and periods ("."). + /// + /// Support more than one punctuation in series, in this case + /// `NodeNameFragment::Empty` would be used. + /// + /// Note: that punct and `NodeNameFragment` has different `Spans` and IDE + /// (rust-analyzer/idea) can controll them independently. + /// So if one needs to add semantic highlight or go-to definition to entire + /// `NodeName` it should emit helper statements for each `Punct` and + /// `NodeNameFragment` (excludeing `Empty` fragment). + Punctuated(Punctuated), /// Arbitrary rust code in braced `{}` blocks. Block(Block), @@ -68,6 +128,75 @@ impl NodeName { _ => false, } } + + /// Parse the stream as punctuated idents. + /// + /// We can't replace this with [`Punctuated::parse_separated_nonempty`] + /// since that doesn't support reserved keywords. Might be worth to + /// consider a PR upstream. + /// + /// [`Punctuated::parse_separated_nonempty`]: https://docs.rs/syn/1.0.58/syn/punctuated/struct.Punctuated.html#method.parse_separated_nonempty + pub(crate) fn node_name_punctuated_ident>( + input: ParseStream, + punct: F, + ) -> syn::Result> { + let fork = &input.fork(); + let mut segments = Punctuated::::new(); + + while !fork.is_empty() && fork.peek(Ident::peek_any) { + let ident = Ident::parse_any(fork)?; + segments.push_value(ident.clone().into()); + + if fork.peek(punct) { + segments.push_punct(fork.parse()?); + } else { + break; + } + } + + if segments.len() > 1 { + input.advance_to(fork); + Ok(segments) + } else { + Err(fork.error("expected punctuated node name")) + } + } + + /// Parse the stream as punctuated idents, with two possible punctuations + /// available + pub(crate) fn node_name_punctuated_ident_with_two_alternate< + T: Parse, + F: Peek, + G: Peek, + H: Peek, + X: From, + >( + input: ParseStream, + punct: F, + alternate_punct: G, + alternate_punct2: H, + ) -> syn::Result> { + let fork = &input.fork(); + let mut segments = Punctuated::::new(); + + while !fork.is_empty() && NodeNameFragment::peek_any(fork) { + let ident = NodeNameFragment::parse(fork)?; + segments.push_value(ident.clone().into()); + + if fork.peek(punct) || fork.peek(alternate_punct) || fork.peek(alternate_punct2) { + segments.push_punct(fork.parse()?); + } else { + break; + } + } + + if segments.len() > 1 { + input.advance_to(fork); + Ok(segments) + } else { + Err(fork.error("expected punctuated node name")) + } + } } impl TryFrom<&NodeName> for Block { @@ -142,8 +271,13 @@ impl fmt::Display for NodeName { impl Parse for NodeName { fn parse(input: syn::parse::ParseStream) -> syn::Result { - if input.peek2(PathSep) { - Parser::node_name_punctuated_ident:: PathSep, PathSegment>( + if input.peek(LitInt) { + Err(syn::Error::new( + input.span(), + "Name must start with latin character", + )) + } else if input.peek2(PathSep) { + NodeName::node_name_punctuated_ident:: PathSep, PathSegment>( input, PathSep, ) .map(|segments| { @@ -156,13 +290,14 @@ impl Parse for NodeName { }, }) }) - } else if input.peek2(Colon) || input.peek2(Dash) { - Parser::node_name_punctuated_ident_with_alternate::< + } else if input.peek2(Colon) || input.peek2(Dash) || input.peek2(Dot) { + NodeName::node_name_punctuated_ident_with_two_alternate::< Punct, fn(_) -> Colon, fn(_) -> Dash, - Ident, - >(input, Colon, Dash) + fn(_) -> Dot, + NodeNameFragment, + >(input, Colon, Dash, Dot) .map(NodeName::Punctuated) } else if input.peek(Brace) { let fork = &input.fork(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f9be3dc..31043ca 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4,13 +4,7 @@ use std::vec; use proc_macro2::TokenStream; use proc_macro2_diagnostics::Diagnostic; -use syn::{ - ext::IdentExt, - parse::{discouraged::Speculative, Parse, ParseStream, Peek}, - punctuated::Punctuated, - spanned::Spanned, - Ident, Result, -}; +use syn::{parse::ParseStream, spanned::Spanned, Result}; pub mod recoverable; @@ -110,71 +104,4 @@ impl Parser { let nodes = if nodes.is_empty() { None } else { Some(nodes) }; ParsingResult::from_parts(nodes, errors) } - - /// Parse the stream as punctuated idents. - /// - /// We can't replace this with [`Punctuated::parse_separated_nonempty`] - /// since that doesn't support reserved keywords. Might be worth to - /// consider a PR upstream. - /// - /// [`Punctuated::parse_separated_nonempty`]: https://docs.rs/syn/1.0.58/syn/punctuated/struct.Punctuated.html#method.parse_separated_nonempty - pub(crate) fn node_name_punctuated_ident>( - input: ParseStream, - punct: F, - ) -> Result> { - let fork = &input.fork(); - let mut segments = Punctuated::::new(); - - while !fork.is_empty() && fork.peek(Ident::peek_any) { - let ident = Ident::parse_any(fork)?; - segments.push_value(ident.clone().into()); - - if fork.peek(punct) { - segments.push_punct(fork.parse()?); - } else { - break; - } - } - - if segments.len() > 1 { - input.advance_to(fork); - Ok(segments) - } else { - Err(fork.error("expected punctuated node name")) - } - } - - /// Parse the stream as punctuated idents, with two possible punctuations - /// available - pub(crate) fn node_name_punctuated_ident_with_alternate< - T: Parse, - F: Peek, - G: Peek, - X: From, - >( - input: ParseStream, - punct: F, - alternate_punct: G, - ) -> Result> { - let fork = &input.fork(); - let mut segments = Punctuated::::new(); - - while !fork.is_empty() && fork.peek(Ident::peek_any) { - let ident = Ident::parse_any(fork)?; - segments.push_value(ident.clone().into()); - - if fork.peek(punct) || fork.peek(alternate_punct) { - segments.push_punct(fork.parse()?); - } else { - break; - } - } - - if segments.len() > 1 { - input.advance_to(fork); - Ok(segments) - } else { - Err(fork.error("expected punctuated node name")) - } - } } diff --git a/tests/test.rs b/tests/test.rs index 6773464..34f0667 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -298,6 +298,30 @@ fn test_dashed_attribute_name() -> Result<()> { Ok(()) } +#[test] +#[should_panic = "Name must start with latin character"] +fn test_dashed_attribute_name_integers_not_supported_at_beginning() { + let tokens = quote! { +
+ }; + + let _ = parse2(tokens).unwrap(); +} + +#[test] +fn test_dashed_attribute_name_with_long_integer_suffixes() -> Result<()> { + let tokens = quote! { +
+ }; + + let nodes = parse2(tokens)?; + let attribute = get_element_attribute(&nodes, 0, 0); + + assert_eq!(attribute.key.to_string(), "data-14-32px-32mzxksq"); + + Ok(()) +} + #[test] fn test_coloned_attribute_name() -> Result<()> { let tokens = quote! {