Skip to content

Commit

Permalink
Split out matcher and filter crate (#463)
Browse files Browse the repository at this point in the history
* Split out matcher crate

* Use matcher crate in filter

* Split out filter crate

* Move extracted_fzy to matcher/extracted_fzy

* Commit cmd/filter.rs

* Move process_top_items() to printer

* Fix test

* Printer sync/dync_filter_results

* Move macros to utility

* .

* .

* .
  • Loading branch information
liuchengxu authored Jun 9, 2020
1 parent ee02a6f commit 41dc711
Show file tree
Hide file tree
Showing 29 changed files with 633 additions and 521 deletions.
319 changes: 167 additions & 152 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions crates/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
[workspace]

members = [
"extracted_fzy",
"fuzzy_filter",
"icon",
"maple_cli",
"pattern",
"printer",
"stdio_server",
"utility",
"upgrade",
"matcher",
"matcher/extracted_fzy",
"filter",
]
19 changes: 9 additions & 10 deletions crates/fuzzy_filter/Cargo.toml → crates/filter/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
[package]
name = "fuzzy_filter"
name = "filter"
version = "0.1.0"
authors = ["Liu-Cheng Xu <[email protected]>"]
edition = "2018"
license = "MIT"
publish = false
homepage = "https://github.com/liuchengxu/vim-clap"
categories = ["Fuzzy Filter Library"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rayon = "1.2"
anyhow = "1.0"
structopt = "0.3"
fuzzy-matcher = "0.3.1"

serde_json = "1.0"
serde = { package = "serde", version = "1.0", features = ["derive"] }
subprocess = { git = "https://github.com/hniksic/rust-subprocess", optional = true }

extracted_fzy = { path = "../extracted_fzy" }
pattern = { path = "../pattern" }
icon = { path = "../icon" }
matcher = { path = "../matcher" }
printer = { path = "../printer" }
utility = { path = "../utility" }

[features]
default = ["enable_dyn"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use super::*;
use fuzzy_filter::{get_appropriate_scorer, FuzzyMatchedLineInfo};
use crate::FilterResult;
use icon::IconPainter;
use icon::ICON_LEN;
use matcher::{get_appropriate_matcher, LineSplitter};
use rayon::slice::ParallelSliceMut;
use std::io::{self, BufRead};
use std::time::{Duration, Instant};
use utility::{println_json, println_json_with_length};

/// The constant to define the length of `top_` queues.
const ITEMS_TO_SHOW: usize = 30;
Expand Down Expand Up @@ -63,8 +66,8 @@ type SelectedTopItemsInfo = (usize, [i64; ITEMS_TO_SHOW], [usize; ITEMS_TO_SHOW]
///
/// First, let's try to produce `ITEMS_TO_SHOW` items to fill the topscores.
fn select_top_items_to_show(
buffer: &mut Vec<FuzzyMatchedLineInfo>,
iter: &mut impl Iterator<Item = FuzzyMatchedLineInfo>,
buffer: &mut Vec<FilterResult>,
iter: &mut impl Iterator<Item = FilterResult>,
) -> std::result::Result<usize, SelectedTopItemsInfo> {
let mut top_scores: [i64; ITEMS_TO_SHOW] = [i64::min_value(); ITEMS_TO_SHOW];
let mut top_results: [usize; ITEMS_TO_SHOW] = [usize::min_value(); ITEMS_TO_SHOW];
Expand Down Expand Up @@ -116,7 +119,7 @@ fn try_notify_top_results(
past: &Instant,
top_results_len: usize,
top_results: &[usize; ITEMS_TO_SHOW],
buffer: &[FuzzyMatchedLineInfo],
buffer: &[FilterResult],
last_lines: &[String],
) -> std::result::Result<(Instant, Option<Vec<String>>), ()> {
if total % 16 == 0 {
Expand All @@ -127,7 +130,7 @@ fn try_notify_top_results(
for &idx in top_results.iter() {
let (text, _, idxs) = std::ops::Index::index(buffer, idx);
let text = if let Some(painter) = icon_painter {
indices.push(idxs.into_iter().map(|x| x + ICON_LEN).collect::<Vec<_>>());
indices.push(idxs.iter().map(|x| x + ICON_LEN).collect::<Vec<_>>());
painter.paint(&text)
} else {
indices.push(idxs.clone());
Expand All @@ -137,10 +140,10 @@ fn try_notify_top_results(
}

if last_lines != lines.as_slice() {
print_json_with_length!(total, lines, indices);
println_json_with_length!(total, lines, indices);
return Ok((now, Some(lines)));
} else {
print_json_with_length!(total);
println_json_with_length!(total);
return Ok((now, None));
}
}
Expand All @@ -166,9 +169,9 @@ fn try_notify_top_results(
///
/// So, this particular function won't work in parallel context at all.
fn dyn_collect_all(
mut iter: impl Iterator<Item = FuzzyMatchedLineInfo>,
mut iter: impl Iterator<Item = FilterResult>,
icon_painter: &Option<IconPainter>,
) -> Vec<FuzzyMatchedLineInfo> {
) -> Vec<FilterResult> {
let mut buffer = Vec::with_capacity({
let (low, high) = iter.size_hint();
high.unwrap_or(low)
Expand Down Expand Up @@ -224,10 +227,10 @@ fn dyn_collect_all(
// I think, it's just good enough. And should be more effective than full
// `collect()` into Vec on big numbers of iterations.
fn dyn_collect_number(
mut iter: impl Iterator<Item = FuzzyMatchedLineInfo>,
mut iter: impl Iterator<Item = FilterResult>,
number: usize,
icon_painter: &Option<IconPainter>,
) -> (usize, Vec<FuzzyMatchedLineInfo>) {
) -> (usize, Vec<FilterResult>) {
// To not have problems with queues after sorting and truncating the buffer,
// buffer has the lowest bound of `ITEMS_TO_SHOW * 2`, not `number * 2`.
let mut buffer = Vec::with_capacity(2 * std::cmp::max(ITEMS_TO_SHOW, number));
Expand Down Expand Up @@ -296,6 +299,7 @@ macro_rules! source_iter_stdin {
}

// Generate an filtered iterator from Source::Exec(exec).
#[cfg(feature = "enable_dyn")]
macro_rules! source_iter_exec {
( $scorer:ident, $exec:ident ) => {
std::io::BufReader::new($exec.stream_stdout()?)
Expand Down Expand Up @@ -331,7 +335,7 @@ macro_rules! source_iter_list {
}

/// Returns the ranked results after applying fuzzy filter given the query string and a list of candidates.
pub fn dyn_fuzzy_filter_and_rank<I: Iterator<Item = String>>(
pub fn dyn_run<I: Iterator<Item = String>>(
query: &str,
source: Source<I>,
algo: Option<Algo>,
Expand All @@ -340,16 +344,17 @@ pub fn dyn_fuzzy_filter_and_rank<I: Iterator<Item = String>>(
icon_painter: Option<IconPainter>,
line_splitter: LineSplitter,
) -> Result<()> {
let algo = if query.contains(" ") {
let algo = if query.contains(' ') {
Algo::SubString
} else {
algo.unwrap_or(Algo::Fzy)
};
let scorer_fn = get_appropriate_scorer(&algo, &line_splitter);
let scorer_fn = get_appropriate_matcher(&algo, &line_splitter);
let scorer = |line: &str| scorer_fn(line, query);
if let Some(number) = number {
let (total, mut filtered) = match source {
Source::Stdin => dyn_collect_number(source_iter_stdin!(scorer), number, &icon_painter),
#[cfg(feature = "enable_dyn")]
Source::Exec(exec) => {
dyn_collect_number(source_iter_exec!(scorer, exec), number, &icon_painter)
}
Expand All @@ -360,22 +365,14 @@ pub fn dyn_fuzzy_filter_and_rank<I: Iterator<Item = String>>(
dyn_collect_number(source_iter_list!(scorer, list), number, &icon_painter)
}
};

filtered.sort_unstable_by(|a, b| b.1.cmp(&a.1));
let (lines, indices, truncated_map) = process_top_items(
number,
filtered.into_iter().take(number),
winwidth.unwrap_or(62),
icon_painter,
);

if truncated_map.is_empty() {
print_json_with_length!(total, lines, indices);
} else {
print_json_with_length!(total, lines, indices, truncated_map);
}

printer::print_dyn_filter_results(filtered, total, number, winwidth, icon_painter);
} else {
let mut filtered = match source {
Source::Stdin => dyn_collect_all(source_iter_stdin!(scorer), &icon_painter),
#[cfg(feature = "enable_dyn")]
Source::Exec(exec) => dyn_collect_all(source_iter_exec!(scorer, exec), &icon_painter),
Source::File(fpath) => dyn_collect_all(source_iter_file!(scorer, fpath), &icon_painter),
Source::List(list) => dyn_collect_all(source_iter_list!(scorer, list), &icon_painter),
Expand Down Expand Up @@ -424,7 +421,7 @@ mod tests {

let mut changing_text: [u8; 16] = [ALPHABET[31]; 16];
let mut total_lines_created: usize = 0;
dyn_fuzzy_filter_and_rank(
dyn_run(
"abc",
Source::List(
std::iter::repeat_with(|| {
Expand Down
37 changes: 37 additions & 0 deletions crates/filter/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//! This crate provides the feature of filtering a stream of lines.
//!
//! Given a stream of lines:
//!
//! 1. apply the matcher algorithm on each of them.
//! 2. sort the all lines with a match result.
//! 3. print the top rated filtered lines to stdout.

mod dynamic;
mod source;

use anyhow::Result;
use matcher::Algo;
use rayon::prelude::*;

pub use dynamic::dyn_run;
pub use matcher;
pub use source::Source;
#[cfg(feature = "enable_dyn")]
pub use subprocess;

/// Tuple of (matched line text, filtering score, indices of matched elements)
pub type FilterResult = (String, i64, Vec<usize>);

/// Returns the ranked results after applying the matcher algo
/// given the query String and filtering source.
pub fn sync_run<I: Iterator<Item = String>>(
query: &str,
source: Source<I>,
algo: Algo,
) -> Result<Vec<FilterResult>> {
let mut ranked = source.filter(algo, query)?;

ranked.par_sort_unstable_by(|(_, v1, _), (_, v2, _)| v2.partial_cmp(&v1).unwrap());

Ok(ranked)
}
25 changes: 11 additions & 14 deletions crates/fuzzy_filter/src/source.rs → crates/filter/src/source.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use super::*;
use crate::{fuzzy_indices_fzy, fuzzy_indices_skim};
use anyhow::Result;
use matcher::{fzy, skim, substring};
use std::io::BufRead;
use std::path::PathBuf;
#[cfg(feature = "enable_dyn")]
Expand Down Expand Up @@ -37,14 +36,14 @@ impl<I: Iterator<Item = String>> From<Exec> for Source<I> {

impl<I: Iterator<Item = String>> Source<I> {
/// Returns the complete filtered results after applying the specified
/// filter algo on each item in the input stream.
/// matcher algo on each item in the input stream.
///
/// This is kind of synchronous filtering, can be used for multi-staged processing.
pub fn fuzzy_filter(self, algo: Algo, query: &str) -> Result<Vec<FuzzyMatchedLineInfo>> {
let scorer = |line: &str| match algo {
Algo::Skim => fuzzy_indices_skim(line, &query),
Algo::Fzy => fuzzy_indices_fzy(line, &query),
Algo::SubString => substr_indices(line, &query),
pub fn filter(self, algo: Algo, query: &str) -> Result<Vec<FilterResult>> {
let matcher = |line: &str| match algo {
Algo::Skim => skim::fuzzy_indices(line, &query),
Algo::Fzy => fzy::fuzzy_indices(line, &query),
Algo::SubString => substring::substr_indices(line, &query),
};

let filtered = match self {
Expand All @@ -53,7 +52,7 @@ impl<I: Iterator<Item = String>> Source<I> {
.lines()
.filter_map(|lines_iter| {
lines_iter.ok().and_then(|line| {
scorer(&line).map(|(score, indices)| (line, score, indices))
matcher(&line).map(|(score, indices)| (line, score, indices))
})
})
.collect::<Vec<_>>(),
Expand All @@ -62,20 +61,18 @@ impl<I: Iterator<Item = String>> Source<I> {
.lines()
.filter_map(|lines_iter| {
lines_iter.ok().and_then(|line| {
scorer(&line).map(|(score, indices)| (line, score, indices))
matcher(&line).map(|(score, indices)| (line, score, indices))
})
})
.collect::<Vec<_>>(),
Self::File(fpath) => std::fs::read_to_string(fpath)?
.par_lines()
.filter_map(|line| {
scorer(&line).map(|(score, indices)| (line.into(), score, indices))
matcher(&line).map(|(score, indices)| (line.into(), score, indices))
})
.collect::<Vec<_>>(),
Self::List(list) => list
.filter_map(|line| {
scorer(&line).map(|(score, indices)| (line.into(), score, indices))
})
.filter_map(|line| matcher(&line).map(|(score, indices)| (line, score, indices)))
.collect::<Vec<_>>(),
};

Expand Down
Loading

0 comments on commit 41dc711

Please sign in to comment.