Skip to content

Commit

Permalink
Optimize grep performance specifically for short queries (#1074)
Browse files Browse the repository at this point in the history
* Add more fuzzy algorithm options

* micro optimization: load total_processed when necessary

2% perf increase is obtained in this commit.

Also abstract out `SearchInfo` for further improvements.

* .

* clippy fixes

* Switch to Jemalloc
  • Loading branch information
liuchengxu authored Apr 22, 2024
1 parent d19e5bb commit 05349d6
Show file tree
Hide file tree
Showing 11 changed files with 223 additions and 61 deletions.
48 changes: 48 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 22 additions & 1 deletion crates/cli/src/command/grep/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ pub use self::live_grep::LiveGrep;
pub const RG_EXEC_CMD: &str =
"rg --column --line-number --no-heading --color=never --smart-case '' .";

#[derive(clap::ValueEnum, Default, Clone, Debug)]
enum FuzzyAlgo {
#[default]
Fzy,
Skim,
FzfV2,
Nucleo,
}

#[derive(Parser, Debug, Clone)]
pub struct Grep {
/// Specify the query string for GREP_CMD.
Expand All @@ -34,6 +43,10 @@ pub struct Grep {
#[clap(long, value_parser)]
cmd_dir: Option<PathBuf>,

/// Specify the fuzzy matching algorithm.
#[clap(long, default_value_t, value_enum)]
fuzzy_algo: FuzzyAlgo,

/// Recreate the grep cache.
///
/// Only intended for the test purpose.
Expand Down Expand Up @@ -69,7 +82,15 @@ impl Grep {
None => std::env::current_dir()?,
};

let clap_matcher = matcher::MatcherBuilder::new().build(self.grep_query.clone().into());
let fuzzy_algo = match self.fuzzy_algo {
FuzzyAlgo::Skim => matcher::FuzzyAlgorithm::Skim,
FuzzyAlgo::Fzy => matcher::FuzzyAlgorithm::Fzy,
FuzzyAlgo::FzfV2 => matcher::FuzzyAlgorithm::FzfV2,
FuzzyAlgo::Nucleo => matcher::FuzzyAlgorithm::Nucleo,
};
let clap_matcher = matcher::MatcherBuilder::new()
.fuzzy_algo(fuzzy_algo)
.build(self.grep_query.clone().into());

let search_result =
maple_core::searcher::grep::cli_search(vec![dir], clap_matcher).await;
Expand Down
3 changes: 3 additions & 0 deletions crates/maple/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ tokio = { workspace = true, features = ["rt"] }
cli = { workspace = true }
upgrade = { workspace = true }

[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.5"

[build-dependencies]
built = { package = "built", version = "0.6", features = ["git2"] }
chrono = { workspace = true }
4 changes: 4 additions & 0 deletions crates/maple/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use clap::Parser;
use cli::{Args, RunCmd};

#[cfg(not(target_env = "msvc"))]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

const BUILD_TIME: &str = include!(concat!(env!("OUT_DIR"), "/compiled_at.txt"));

mod built_info {
Expand Down
14 changes: 8 additions & 6 deletions crates/maple_core/src/searcher/grep/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ pub use self::stoppable_searcher::search;
use self::stoppable_searcher::{FileResult, StoppableSearchImpl, UPDATE_INTERVAL};
use matcher::Matcher;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicUsize};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Instant;
use tokio::sync::mpsc::unbounded_channel;

use super::SearchInfo;

#[derive(Debug)]
pub struct SearchResult {
pub matches: Vec<FileResult>,
Expand All @@ -21,14 +23,14 @@ pub async fn cli_search(paths: Vec<PathBuf>, matcher: Matcher) -> SearchResult {

let stop_signal = Arc::new(AtomicBool::new(false));

let total_processed = Arc::new(AtomicUsize::new(0));
let search_info = SearchInfo::new();

{
let total_processed = total_processed.clone();
let search_info = search_info.clone();
std::thread::Builder::new()
.name("searcher-worker".into())
.spawn(move || {
StoppableSearchImpl::new(paths, matcher, sender, stop_signal).run(total_processed)
StoppableSearchImpl::new(paths, matcher, sender, stop_signal).run(search_info)
})
.expect("Failed to spawn searcher worker thread");
}
Expand All @@ -41,7 +43,7 @@ pub async fn cli_search(paths: Vec<PathBuf>, matcher: Matcher) -> SearchResult {
while let Some(file_result) = receiver.recv().await {
matches.push(file_result);
total_matched += 1;
let total_processed = total_processed.load(std::sync::atomic::Ordering::Relaxed);
let total_processed = search_info.total_processed.load(Ordering::Relaxed);

if total_matched % 16 == 0 || total_processed % 16 == 0 {
let now = Instant::now();
Expand All @@ -52,7 +54,7 @@ pub async fn cli_search(paths: Vec<PathBuf>, matcher: Matcher) -> SearchResult {
}
}

let total_processed = total_processed.load(std::sync::atomic::Ordering::SeqCst) as u64;
let total_processed = search_info.total_processed.load(Ordering::SeqCst) as u64;

SearchResult {
matches,
Expand Down
Loading

0 comments on commit 05349d6

Please sign in to comment.