Skip to content

Commit

Permalink
chunk: split options in order rather than randomomly using hashing
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcelCoding committed Oct 12, 2024
1 parent 6507f83 commit 753419c
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 72 deletions.
16 changes: 0 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 34 additions & 6 deletions fixx/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use wasm_bindgen::prelude::*;

#[wasm_bindgen]
pub fn hash(option: String) -> u8 {
libixx::hash(&option)
}
pub struct Index(libixx::Index);

#[wasm_bindgen]
pub struct Index(libixx::Index);
pub struct SearchedOption {
idx: usize,
name: String,
}

#[wasm_bindgen]
impl Index {
Expand All @@ -16,10 +17,37 @@ impl Index {
.map_err(|err| format!("{:?}", err))
}

pub fn search(&self, query: String, max_results: usize) -> Result<Vec<String>, String> {
pub fn search(&self, query: String, max_results: usize) -> Result<Vec<SearchedOption>, String> {
match self.0.search(&query, max_results) {
Ok(options) => Ok(
options
.into_iter()
.map(|(idx, name)| SearchedOption { idx, name })
.collect(),
),
Err(err) => Err(format!("{:?}", err)),
}
}

pub fn all(&self, max: usize) -> Result<Vec<String>, String> {
self.0.all(max).map_err(|err| format!("{:?}", err))
}

pub fn get_idx_by_name(&self, name: String) -> Result<Option<usize>, String> {
self
.0
.search(&query, max_results)
.get_idx_by_name(&name)
.map_err(|err| format!("{:?}", err))
}
}

#[wasm_bindgen]
impl SearchedOption {
pub fn idx(&self) -> usize {
self.idx
}

pub fn name(self) -> String {
self.name
}
}
38 changes: 11 additions & 27 deletions ixx/src/action/index.rs
Original file line number Diff line number Diff line change
@@ -1,43 +1,27 @@
use std::{
collections::{hash_map::Entry, BTreeMap, HashMap},
fs::File,
path::Path,
sync::LazyLock,
};
use std::{collections::BTreeMap, fs::File, path::Path, sync::LazyLock};

use libixx::Index;
use syntect::{highlighting::ThemeSet, html::highlighted_html_for_string, parsing::SyntaxSet};

use crate::{args::IndexModule, option};

pub(crate) fn index(module: IndexModule) -> anyhow::Result<()> {
let mut options: BTreeMap<String, option::Option> = BTreeMap::new();
let mut raw_options: BTreeMap<String, option::Option> = BTreeMap::new();

for path in module.files {
println!("Parsing {}", path.to_string_lossy());
let file = File::open(path)?;
options.append(&mut serde_json::from_reader(file)?);
raw_options.append(&mut serde_json::from_reader(file)?);
}

println!("Read {} options", options.len());
println!("Read {} options", raw_options.len());

let mut index = Index::new();
let mut index = Index::default();
let mut options = Vec::new();

let mut buckets = HashMap::new();

for (name, option) in options {
for (name, option) in raw_options {
index.push(&name);

let option = into_option(&name, option);

match buckets.entry(libixx::hash(&name)) {
Entry::Vacant(vac) => {
vac.insert(vec![option]);
}
Entry::Occupied(mut occ) => {
occ.get_mut().push(option);
}
}
options.push(into_option(&name, option));
}

println!("Writing index to {}", module.output.to_string_lossy());
Expand All @@ -51,9 +35,9 @@ pub(crate) fn index(module: IndexModule) -> anyhow::Result<()> {
std::fs::create_dir("meta")?;
}

for (name, bucket) in buckets {
let mut file = File::create(format!("meta/{}.json", name))?;
serde_json::to_writer(&mut file, &bucket)?;
for (idx, chunk) in options.chunks(module.chunk_size).enumerate() {
let mut file = File::create(format!("meta/{}.json", idx))?;
serde_json::to_writer(&mut file, &chunk)?;
}

Ok(())
Expand Down
4 changes: 2 additions & 2 deletions ixx/src/action/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ pub(crate) fn search(module: SearchModule) -> anyhow::Result<()> {
let index = Index::read_from(&mut file)?;

let result = index.search(&module.query, module.max_results as usize)?;
for option in result {
println!("{}", option);
for (idx, name) in result {
println!("idx: {}, name: {}", idx, name);
}

Ok(())
Expand Down
3 changes: 3 additions & 0 deletions ixx/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ pub(super) struct IndexModule {

#[clap(short, long, default_value = "index.ixx")]
pub(super) output: PathBuf,

#[clap(short, long, default_value = "10")]
pub(super) chunk_size: usize,
}

#[derive(Parser)]
Expand Down
1 change: 0 additions & 1 deletion libixx/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ license = "MIT OR Apache-2.0"
serde = { version = "1.0", features = ["derive"] }
thiserror = "1.0"
bincode = "1.3"
crc = "3.2"

[dev-dependencies]
serde_json = "1.0"
7 changes: 0 additions & 7 deletions libixx/src/hash.rs

This file was deleted.

88 changes: 78 additions & 10 deletions libixx/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,22 @@ use serde::{Deserialize, Serialize};

use crate::IxxError;

#[derive(Serialize, Deserialize, Debug)]
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Index(Vec<Vec<Label>>);

#[derive(Serialize, Deserialize, Debug)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct Reference {
option_idx: u16,
label_idx: u8,
}

#[derive(Serialize, Deserialize, Debug)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
enum Label {
InPlace(String),
Reference(Reference),
}
impl Index {
pub fn new() -> Self {
Self(Vec::new())
}

impl Index {
pub fn read(buf: &[u8]) -> Result<Self, IxxError> {
Ok(bincode::deserialize(buf)?)
}
Expand Down Expand Up @@ -88,15 +85,49 @@ impl Index {
}
}

pub fn search(&self, query: &str, max_results: usize) -> Result<Vec<String>, IxxError> {
pub fn get_idx_by_name(&self, option: &str) -> Result<Option<usize>, IxxError> {
let mut labels = Vec::new();
for segment in option.split('.') {
'outer: {
for (option_idx, option) in self.0.iter().enumerate() {
for (label_idx, label) in option.iter().enumerate() {
if let Label::InPlace(inplace) = label {
if inplace != segment {
continue;
}

labels.push(Reference {
option_idx: option_idx as u16,
label_idx: label_idx as u8,
});
break 'outer;
}
}
}

return Ok(None);
}
}

Ok(
self
.0
.iter()
.enumerate()
.find(|(idx, option)| do_labels_match(*idx, option, &labels))
.map(|(idx, _)| idx),
)
}

pub fn search(&self, query: &str, max_results: usize) -> Result<Vec<(usize, String)>, IxxError> {
let search = query
.split('*')
.map(|segment| segment.to_lowercase())
.collect::<Vec<_>>();

let mut results = Vec::new();

for option in &self.0 {
for (idx, option) in self.0.iter().enumerate() {
let mut option_name = String::new();
for label in option {
match label {
Expand All @@ -118,7 +149,7 @@ impl Index {
}
}

results.push(option_name);
results.push((idx, option_name));
if results.len() >= max_results {
return Ok(results);
}
Expand All @@ -127,4 +158,41 @@ impl Index {

Ok(results)
}

pub fn all(&self, max: usize) -> Result<Vec<String>, IxxError> {
let mut options = Vec::new();

for option in &self.0[..max] {
let mut option_name = String::new();
for label in option {
match label {
Label::InPlace(data) => option_name.push_str(data),
Label::Reference(reference) => option_name.push_str(self.resolve_reference(reference)?),
}
option_name.push('.')
}
// remove last dot...
option_name.pop();

options.push(option_name);
}

Ok(options)
}
}

fn do_labels_match(option_idx: usize, option: &[Label], search: &[Reference]) -> bool {
let matching = option
.iter()
.enumerate()
.zip(search.iter())
.filter(|&((label_idx, option), search)| match option {
Label::InPlace(_) => {
option_idx == search.option_idx as usize && label_idx == search.label_idx as usize
}
Label::Reference(reference) => reference == search,
})
.count();

matching == option.len() && matching == search.len()
}
2 changes: 0 additions & 2 deletions libixx/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
pub use error::IxxError;
pub use hash::hash;
pub use index::Index;
pub use option::Option;

mod error;
mod hash;
mod index;
mod option;

Expand Down
4 changes: 3 additions & 1 deletion libixx/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ fn test() {

let options = options.keys().collect::<Vec<_>>();

let mut index = Index::new();
let mut index = Index::default();
for option in &options {
index.push(option);
}

println!("{:?}", index.search("ho*exta", 10).unwrap());

let mut file = File::create("index.nuscht").unwrap();
index.write_into(&mut file).unwrap();
}

0 comments on commit 753419c

Please sign in to comment.