From 3f52006c50575118dd2288cd4f4efaff87d735b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E7=8E=AE=20=28Jade=20Lin=29?= Date: Wed, 25 Sep 2024 22:40:47 +0800 Subject: [PATCH] Upgrade to peggen from pratt-gen (#14) * Upgrade to peggen from pratt-gen * Bump bearmark-ql version to 0.2.0 --- Cargo.toml | 14 ++- bearmark-ql/Cargo.toml | 8 +- bearmark-ql/src/lib.rs | 279 ++++++++++++++++------------------------- src/db/search.rs | 195 +++++++++++++--------------- 4 files changed, 214 insertions(+), 282 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ba1e3f..17a8007 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,8 @@ edition = "2021" members = [".", "bearmark-ql"] [workspace.dependencies] -serde = { version = "1.0.203", features = ["derive"] } +# allocation +bumpalo = "3.16.0" # logging tracing = "0.1.40" tracing-appender = "0.2.3" @@ -33,16 +34,21 @@ openssl-sys = { version = "0.9.102", features = [ ] } # static linking required pq-sys = { version = "0.6.1", features = ["bundled"] } # static linking required +##################### +# search enhancement +##################### +bearmark-ql = { path = "bearmark-ql" } +# allocation +bumpalo.workspace = true + ################## # other utilities ################## -serde.workspace = true +serde = { version = "1.0.203", features = ["derive"] } rand = "0.8.5" itertools = "0.13.0" time = { version = "0.3.36", features = ["local-offset", "macros", "serde"] } percent-encoding = "2.3.1" -# search enhancement -bearmark-ql = { path = "bearmark-ql" } # read settings from the dotenv file dotenvy = "0.15.7" # logging diff --git a/bearmark-ql/Cargo.toml b/bearmark-ql/Cargo.toml index ec55a62..32aa867 100644 --- a/bearmark-ql/Cargo.toml +++ b/bearmark-ql/Cargo.toml @@ -1,11 +1,13 @@ [package] name = "bearmark-ql" -version = "0.1.0" +version = "0.2.0" edition = "2021" [dependencies] -pratt-gen = "0.1.0" -serde.workspace = true +peggen = "0.2.6" + +# allocation +bumpalo.workspace = true # logging tracing.workspace = true diff --git a/bearmark-ql/src/lib.rs b/bearmark-ql/src/lib.rs index 7998068..9cd6861 100644 --- a/bearmark-ql/src/lib.rs +++ b/bearmark-ql/src/lib.rs @@ -1,195 +1,135 @@ -use std::fmt::{Debug, Display}; +use bumpalo::boxed::Box as BBox; +use bumpalo::collections::String as BString; +use peggen::*; -use pratt_gen::*; -use serde::Serializer; +pub use peggen::Parser; -pub use pratt_gen::{parse, Arena, Source}; - -#[derive(Debug, Clone, Copy, ParserImpl, Space)] +#[derive(Debug, PartialEq, ParseImpl, Space, Num, EnumAstImpl)] +#[with(&'a bumpalo::Bump)] pub enum Query<'a> { - #[parse("{0:2} | {1:1}", precedence = 2)] - Or(&'a Self, &'a Self), - #[parse("{0:4} {1:3}", precedence = 4)] - And(&'a Self, &'a Self), - #[parse("({0})")] - Parenthesized(&'a Self), - #[parse("{0}")] - Primitive(Primitive<'a>), -} - -#[derive(Debug, Clone, Copy, ParserImpl, Space)] -pub enum Primitive<'a> { - #[parse("{0}")] - Path(Path<'a>), - #[parse("#{0}")] - Tag(Tag<'a>), - #[parse("{0}")] - Keyword(Keyword<'a>), -} - -#[derive(Debug, Clone, Copy, ParserImpl, Space)] -pub enum Path<'a> { - #[parse("./{0}")] - Relative(&'a RelativePath<'a>), - #[parse("/{0}")] - Absolute(&'a RelativePath<'a>), - #[parse("/")] - Root(), - #[parse("./")] - CWD(), -} - -#[derive(Debug, Clone, Copy, ParserImpl, Space)] -pub enum RelativePath<'a> { - #[parse("{0}/{1}")] - Join(Keyword<'a>, &'a Self), - #[parse("{0}/")] - NameEndSlash(Keyword<'a>), - #[parse("{0}")] - Name(Keyword<'a>), - #[parse("/")] // for tailing "/", "//" ... syntax - ExtraSlash(), + #[rule("{0:0} | {1:1}", group = 0)] + Or(BBox<'a, Query<'a>>, BBox<'a, Query<'a>>), + #[rule("{0:1} {1:2}", group = 1)] + And(BBox<'a, Query<'a>>, BBox<'a, Query<'a>>), + #[rule(r"( {0} )", group = 2)] + Parenthesized(BBox<'a, Query<'a>>), + #[rule(r#"#{0:`\w*`}"#, group = 2)] + Tag(BString<'a>), + #[rule(r#"{0:`\w+`}"#, group = 2)] + Keyword(BString<'a>), + #[rule(r#"{0:`(\.)?(/\w+)*/{0,2}`}"#, group = 3)] + Path(BString<'a>), } -#[derive(Debug, Clone, Copy, ParserImpl, Space)] -pub enum Tag<'a> { - #[parse("{0}")] - Keyword(Keyword<'a>), - #[parse("")] - Null(), -} - -#[derive(Clone, Copy, ParserImpl, Space)] -pub enum Keyword<'a> { - #[parse("{0}")] - Quoted(&'a str), - #[parse("{0}")] - Unquoted(&'a Ident<'a>), -} - -impl<'a> Debug for Keyword<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.serialize_str(match self { - Self::Quoted(s) => s, - Self::Unquoted(s) => s.0, - }) - } +#[cfg(test)] +#[cfg(not(tarpaulin_include))] +#[ctor::ctor] +fn init() { + use std::io; + use tracing_subscriber::{prelude::*, EnvFilter}; + + let console_log = tracing_subscriber::fmt::layer() + .pretty() + .with_writer(io::stdout) + .boxed(); + + tracing_subscriber::registry() + .with(vec![console_log]) + .with(EnvFilter::from_default_env()) + .init(); } -impl<'a> From> for &'a str { - fn from(k: Keyword<'a>) -> &'a str { - match k { - Keyword::Quoted(s) => s, - Keyword::Unquoted(s) => s.0, - } - } -} +#[cfg(test)] +mod test { + use super::*; + use Query::*; -impl<'a> From> for &'a str { - fn from(t: Tag<'a>) -> &'a str { - match t { - Tag::Keyword(k) => k.into(), - Tag::Null() => "", - } - } -} + use tracing::info; -impl Display for Keyword<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Keyword::Quoted(s) => write!(f, "\"{}\"", s), - Keyword::Unquoted(s) => write!(f, "{}", s.0), + #[test] + fn test_primitive_path() { + let bump = bumpalo::Bump::new(); + for src in [ + "/", // root + "./", // relative root + "/bar", // decendent of /bar + "/bar/", // decendent of /bar + "/bar/boo", // decendent of /bar/boo + "/bar/boo/", // decendent of /bar/boo + "./bar", // decendent of ./bar + "./bar/", // decendent of ./bar + "./bar/boo", // decendent of ./bar/boo + "./bar/boo/", // decendent of ./bar/boo + "//", // childern of root + ".//", // children of relative root + "/boo//", // children of /boot + "./boo//", // children of ./boo + ] { + let rv = Parser::::parse_with(src, &bump); + info!(?rv, src, "parse result"); + assert!(rv.is_ok()); + assert_eq!(rv.unwrap(), Path(BString::from_str_in(src, &bump))); } } -} -impl Display for Tag<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Tag::Keyword(k) => write!(f, "#{}", k), - Tag::Null() => Ok(()), + #[test] + fn test_primitive_tag() { + let bump = bumpalo::Bump::new(); + for src in [ + "", // empty tag + "foo", // tag foo + "foo_bar", // tag foo_bar + ] { + let rv = Parser::::parse_with(&format!("#{}", src), &bump); + info!(?rv, src, "parse result"); + assert!(rv.is_ok()); + assert_eq!(rv.unwrap(), Tag(BString::from_str_in(src, &bump))); } } -} -#[derive(Debug)] -pub struct QueryResult<'a> { - pub tags: Vec<&'a str>, - pub keywords: Vec<&'a str>, - pub paths: Vec<&'a str>, -} - -fn _path_to_str_parts<'a>(p: Path<'a>, parts: &mut Vec<&'a str>) { - let mut cur = match p { - Path::Root() => { - parts.push(""); - parts.push(""); - return; - } - Path::Absolute(p) => { - parts.push(""); - *p - } - Path::Relative(p) => { - parts.push("."); - *p - } - Path::CWD() => { - parts.push("."); - parts.push(""); - return; - } - }; - while match cur { - RelativePath::Join(item, that) => { - parts.push(item.into()); - cur = *that; - true - } - RelativePath::Name(item) => { - parts.push(item.into()); - false - } - RelativePath::NameEndSlash(item) => { - parts.push(item.into()); - parts.push(""); - false - } - RelativePath::ExtraSlash() => { - parts.push(""); - parts.push(""); - false + #[test] + fn test_primitive_keyword() { + let bump = bumpalo::Bump::new(); + for src in [ + "foo", // keyword foo + "foo_bar", // keyword foo_bar + ] { + let rv = Parser::::parse_with(src, &bump); + info!(?rv, src, "parse result"); + assert!(rv.is_ok()); + assert_eq!(rv.unwrap(), Keyword(BString::from_str_in(src, &bump))); } - } {} -} - -impl Display for Path<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut parts = vec![]; - _path_to_str_parts(*self, &mut parts); - write!(f, "{}", parts.join("/")) } -} -#[cfg(test)] -mod test { - use super::*; - use tracing::{debug, info}; - - fn parse_query<'a>(raw: &'a str, out_arena: &'a Arena, err_arena: &'a Arena) -> Query<'a> { - let source = Source::new(raw); - let rv = parse::(source, out_arena, err_arena); - debug!(?rv, ?source, "parsed"); + #[test] + fn test_query_and() { + let src = r#"#title | trust rust"#; + let bump = bumpalo::Bump::new(); + let rv = Parser::::parse_with(src, &bump); + info!(?rv, src, "parse result"); assert!(rv.is_ok()); - rv.unwrap() + assert_eq!( + rv.unwrap(), + Or( + BBox::new_in(Tag(BString::from_str_in("title", &bump)), &bump), + BBox::new_in( + And( + BBox::new_in(Keyword(BString::from_str_in("trust", &bump)), &bump), + BBox::new_in(Keyword(BString::from_str_in("rust", &bump)), &bump) + ), + &bump + ) + ) + ); } #[test] fn test_parsing() { - let out_arena = Arena::new(); - let err_arena = Arena::new(); - + let bump = bumpalo::Bump::new(); for src in [ + r#"title #rust"#, + r#"/cs/pl/rust"#, + r#"/cs/pl title"#, r#"/cs/pl/rust title #rust"#, r#"/cs/pl title #rust"#, r#"/cs title #rust"#, @@ -199,8 +139,9 @@ mod test { r#"title ( #rust | #langs )"#, r#"/blog/"#, ] { - let rv = parse_query(src, &out_arena, &err_arena); + let rv = Parser::::parse_with(src, &bump); info!(?rv, ?src, "parsed"); + assert!(rv.is_ok()); } } } diff --git a/src/db/search.rs b/src/db/search.rs index e23e477..263fa4c 100644 --- a/src/db/search.rs +++ b/src/db/search.rs @@ -1,3 +1,4 @@ +use bumpalo; use diesel::expression::BoxableExpression; use diesel::pg::Pg; use diesel::prelude::*; @@ -10,17 +11,15 @@ use super::folder::Folder; use super::tag::Tag; use crate::db::schema; use crate::utils::{BearQLError, CommonError}; - -use bearmark_ql::{self as search, parse, Arena, Source}; +use bearmark_ql; fn parse_query<'a>( - raw: &'a str, - out_arena: &'a Arena, - err_arena: &'a Arena, -) -> Result, BearQLError> { + raw: &str, + bump: &'a bumpalo::Bump, +) -> Result, BearQLError> { + use bearmark_ql::{Parser, Query}; debug!(?raw, "parsing query"); - let source = Source::new(raw); - let rv = parse::(source, out_arena, err_arena).map_err(|e| { + let rv = Parser::::parse_with(raw, bump).map_err(|e| { warn!(?raw, ?e, "failed to parse query"); BearQLError::SyntaxError { msg: "failed to parse query".to_string(), @@ -77,103 +76,80 @@ fn find_bookmarks_in_path( } fn find_bookmarks( - query: search::Query<'_>, + query: &bearmark_ql::Query, cwd: &str, cwd_overwrited: &mut bool, ) -> Result>, CommonError> { use super::schema::{bookmarks, bookmarks_tags, tags}; - use search::Primitive::*; - use search::Query::*; + use bearmark_ql::Query::*; Ok(match query { - Or(a, b) => Box::new(find_bookmarks(*a, cwd, cwd_overwrited)?.or(find_bookmarks( - *b, + Or(a, b) => Box::new(find_bookmarks(a, cwd, cwd_overwrited)?.or(find_bookmarks( + b, cwd, cwd_overwrited, )?)), - And(a, b) => Box::new(find_bookmarks(*a, cwd, cwd_overwrited)?.and(find_bookmarks( - *b, + And(a, b) => Box::new(find_bookmarks(a, cwd, cwd_overwrited)?.and(find_bookmarks( + b, cwd, cwd_overwrited, )?)), - Parenthesized(a) => find_bookmarks(*a, cwd, cwd_overwrited)?, - Primitive(p) => { - match p { - Path(p) => { - let target = p.to_string(); - let path = join_folder_path(cwd, &target); - *cwd_overwrited = true; - debug!(?path, ?cwd, ?target, "searching in path"); - if path == "/" { - Box::new(bookmarks::dsl::id.eq(bookmarks::dsl::id)) // always true, no side effects - } else if path == "//" { - Box::new(bookmarks::dsl::folder_id.is_null()) // special syntax. search bookmarks which are not in any folder - } else { - find_bookmarks_in_path(&path)? - } - } - Tag(t) => { - let t = t.to_string(); - let t = t.trim_start_matches('#').trim().to_string(); - if t.is_empty() { - return Err(CommonError::BearQL(BearQLError::EmptyTag)); - } - let bookmarks = diesel::alias!(bookmarks as bm); - Box::new( - bookmarks::dsl::id.eq_any( - bookmarks - .inner_join(bookmarks_tags::table) - .filter(bookmarks_tags::dsl::tag_id.eq_any( - tags::table.filter(tags::dsl::name.eq(t)).select(tags::id), - )) - .select(bookmarks.fields(bookmarks::id)) - .distinct(), - ), - ) - } - Keyword(k) => { - let k = k.to_string(); - let k = k.trim().to_string(); - if k.is_empty() { - return Err(CommonError::BearQL(BearQLError::EmptyKeyword)); - } - Box::new( - bookmarks::dsl::title - .ilike(format!("%{}%", k)) - .or(bookmarks::dsl::url.ilike(format!("%{}%", k))), - ) - } + Parenthesized(a) => find_bookmarks(a, cwd, cwd_overwrited)?, + Path(p) => { + let target = p.to_string(); + let path = join_folder_path(cwd, &target); + *cwd_overwrited = true; + debug!(?path, ?cwd, ?target, "searching in path"); + if path == "/" { + Box::new(bookmarks::dsl::id.eq(bookmarks::dsl::id)) // always true, no side effects + } else if path == "//" { + Box::new(bookmarks::dsl::folder_id.is_null()) // special syntax. search bookmarks which are not in any folder + } else { + find_bookmarks_in_path(&path)? + } + } + Tag(t) => { + let t = t.to_string(); + let t = t.trim_start_matches('#').trim().to_string(); + if t.is_empty() { + return Err(CommonError::BearQL(BearQLError::EmptyTag)); } + let bookmarks = diesel::alias!(bookmarks as bm); + Box::new( + bookmarks::dsl::id.eq_any( + bookmarks + .inner_join(bookmarks_tags::table) + .filter( + bookmarks_tags::dsl::tag_id + .eq_any(tags::table.filter(tags::dsl::name.eq(t)).select(tags::id)), + ) + .select(bookmarks.fields(bookmarks::id)) + .distinct(), + ), + ) + } + Keyword(k) => { + let k = k.to_string(); + let k = k.trim().to_string(); + if k.is_empty() { + return Err(CommonError::BearQL(BearQLError::EmptyKeyword)); + } + Box::new( + bookmarks::dsl::title + .ilike(format!("%{}%", k)) + .or(bookmarks::dsl::url.ilike(format!("%{}%", k))), + ) } }) } +/// Search bookmarks by paths, keywords, and tags. pub async fn search_bookmarks( conn: &mut Connection, query: Option<&str>, cwd: Option<&str>, before: i32, limit: i64, -) -> Result, Vec)>, CommonError> { - if let Some(query) = query { - let out_arena = Arena::new(); - let err_arena = Arena::new(); - - let rv = parse_query(query, &out_arena, &err_arena)?; - - search_bookmarks_with_query(conn, Some(rv), cwd, before, limit).await - } else { - search_bookmarks_with_query(conn, None, cwd, before, limit).await - } -} - -/// Search bookmarks by paths, keywords, and tags. -async fn search_bookmarks_with_query( - conn: &mut Connection, - query: Option>, - cwd: Option<&str>, - before: i32, - limit: i64, ) -> Result, Vec)>, CommonError> { use super::schema::bookmarks; @@ -182,10 +158,13 @@ async fn search_bookmarks_with_query( .distinct_on(bookmarks::id) .filter(bookmarks::dsl::deleted_at.is_null()) .into_boxed(); + let mut cwd_overwrited = false; if let Some(query) = query { let cwd = cwd.unwrap_or("/"); - builder = builder.filter(find_bookmarks(query, cwd, &mut cwd_overwrited)?) + let bump = bumpalo::Bump::new(); + let query = parse_query(query, &bump)?; + builder = builder.filter(find_bookmarks(&query, cwd, &mut cwd_overwrited)?) } if !cwd_overwrited { if let Some(cwd) = cwd { @@ -199,21 +178,22 @@ async fn search_bookmarks_with_query( } } } + if before > 0 { builder = builder.filter(bookmarks::dsl::id.lt(before)); } + let lst = builder .order_by(bookmarks::id.desc()) .limit(limit) .load::(conn) .await .expect("Error loading bookmarks"); - - if lst.is_empty() { - return Ok(vec![]); - } - - Ok(get_bookmark_details(conn, lst).await) + Ok(if lst.is_empty() { + vec![] + } else { + get_bookmark_details(conn, lst).await + }) } pub async fn get_bookmark_details( @@ -317,31 +297,27 @@ pub(crate) mod test { Keyword(String), } - fn simplify_query(q: search::Query) -> Query { + fn simplify_query(q: &bearmark_ql::Query) -> Query { use Query::*; match q { - search::Query::Or(a, b) => { - Or(Box::new(simplify_query(*a)), Box::new(simplify_query(*b))) + bearmark_ql::Query::Or(a, b) => { + Or(Box::new(simplify_query(a)), Box::new(simplify_query(b))) } - search::Query::And(a, b) => { - And(Box::new(simplify_query(*a)), Box::new(simplify_query(*b))) + bearmark_ql::Query::And(a, b) => { + And(Box::new(simplify_query(a)), Box::new(simplify_query(b))) } - search::Query::Parenthesized(a) => Parenthesized(Box::new(simplify_query(*a))), - search::Query::Primitive(p) => match p { - search::Primitive::Path(p) => Path(p.to_string()), - search::Primitive::Tag(t) => Tag(t.to_string().trim_start_matches('#').to_string()), - search::Primitive::Keyword(k) => Keyword(k.to_string()), - }, + bearmark_ql::Query::Parenthesized(a) => Parenthesized(Box::new(simplify_query(a))), + bearmark_ql::Query::Path(p) => Path(p.to_string()), + bearmark_ql::Query::Tag(t) => Tag(t.to_string().trim_start_matches('#').to_string()), + bearmark_ql::Query::Keyword(k) => Keyword(k.to_string()), } } #[test] fn test_parse_query() { - let out_arena = Arena::new(); - let err_arena = Arena::new(); - use Query::*; + let bump = bumpalo::Bump::new(); for (raw, expect) in &[ ("rust", Keyword("rust".into())), ("#rust", Tag("rust".into())), @@ -352,6 +328,13 @@ pub(crate) mod test { ("//", Path("//".into())), (".//", Path(".//".into())), ("/blog/", Path("/blog/".into())), + ( + "title #rust", + And( + Box::new(Keyword("title".into())), + Box::new(Tag("rust".into())), + ), + ), ( "rust | langs go", Or( @@ -383,9 +366,9 @@ pub(crate) mod test { ), ), ] { - let query = parse_query(raw, &out_arena, &err_arena).unwrap(); - let query = simplify_query(query); - + let query = parse_query(raw, &bump).unwrap(); + let query = simplify_query(&query); + info!(?raw, ?query, ?expect, "testing parse query"); assert_eq!(query, *expect); } }