From f084a5aaa068240b548deaaad605c85439171f38 Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrienball3@gmail.com>
Date: Mon, 29 Apr 2024 15:42:21 +0200
Subject: [PATCH 1/3] Update dependencies and remove examples + benches

---
 .github/workflows/ci.yml                |  64 ++++++++++
 .travis.yml                             |  14 ---
 Cargo.toml                              |  22 +---
 benches/bench_parser.rs                 | 157 ------------------------
 data/benches/.gitignore                 |   1 -
 examples/entity_parsing_from_scratch.rs |  47 -------
 examples/interactive_parsing_cli.rs     | 104 ----------------
 src/utils.rs                            |   5 +-
 8 files changed, 69 insertions(+), 345 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 delete mode 100644 .travis.yml
 delete mode 100644 benches/bench_parser.rs
 delete mode 100644 data/benches/.gitignore
 delete mode 100644 examples/entity_parsing_from_scratch.rs
 delete mode 100644 examples/interactive_parsing_cli.rs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..a6fd370
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,64 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - 'main'
+  schedule:
+    - cron:  '0 9 * * *'
+
+jobs:
+  fmt:
+    name: Rust fmt
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: 1.74.0
+          components: rustfmt
+      - name: Cargo fmt check
+        run: cargo fmt --all -- --check
+
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: 1.74.0
+          components: clippy
+      - name: Cargo Clippy
+        run: cargo clippy -- -D warnings
+
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: 1.74.0
+      - name: Cargo build
+        run: cargo build
+
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: 1.74.0
+      - name: Cargo test
+        run: cargo test
+
+
+
+
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index cdb8af4..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-language: rust
-
-rust:
-- stable
-- beta
-- nightly
-
-matrix:
-  allow_failures:
-  - rust: nightly
-
-script:
-  - cargo check --all --tests --benches --examples
-  - cargo test --all
diff --git a/Cargo.toml b/Cargo.toml
index b51aec4..4c2a149 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,24 +11,10 @@ debug = true
 
 [dependencies]
 failure = "0.1"
-serde_json = "1.0"
-serde = { version = "1.0", features = ["derive"] }
-rmp-serde = "0.13"
-fnv = "1.0"
+serde_json = "1"
+serde = { version = "1", features = ["derive"] }
+rmp-serde = "1"
+fnv = "1"
 
 [dev-dependencies]
-criterion = "0.2"
-dinghy-test = "0.4"
-rand = "0.7"
 tempfile = "3"
-clap = "2"
-
-[[bench]]
-name = "bench_parser"
-harness = false
-
-[[example]]
-name = "interactive_parsing_cli"
-
-[[example]]
-name = "entity_parsing_from_scratch"
diff --git a/benches/bench_parser.rs b/benches/bench_parser.rs
deleted file mode 100644
index 9cd7835..0000000
--- a/benches/bench_parser.rs
+++ /dev/null
@@ -1,157 +0,0 @@
-#[macro_use]
-extern crate criterion;
-extern crate dinghy_test;
-extern crate gazetteer_entity_parser;
-extern crate rand;
-extern crate serde_json;
-
-use criterion::Criterion;
-use gazetteer_entity_parser::*;
-use rand::distributions::Alphanumeric;
-use rand::rngs::ThreadRng;
-use rand::seq::IteratorRandom;
-use rand::thread_rng;
-use rand::Rng;
-use std::collections::HashSet;
-
-pub fn test_data_path() -> ::std::path::PathBuf {
-    ::dinghy_test::try_test_file_path("data").unwrap_or_else(|| "data".into())
-}
-
-/// Function generating a random string representing a single word of various length
-fn generate_random_string(rng: &mut ThreadRng) -> String {
-    let n_char = rng.gen_range(3, 8);
-    rng.sample_iter(&Alphanumeric).take(n_char).collect()
-}
-
-/// Random string generator with tunable redundancy to make it harder for the parser
-#[derive(Clone)]
-struct RandomStringGenerator {
-    vocabulary: Vec<String>,
-    max_words: usize,
-    rng: ThreadRng,
-    already_generated: HashSet<String>,
-}
-
-impl RandomStringGenerator {
-    fn new(vocab_size: usize, max_words: usize) -> RandomStringGenerator {
-        let mut rng = thread_rng();
-        let unique_strings = (0..vocab_size)
-            .map(|_| generate_random_string(&mut rng))
-            .collect();
-        RandomStringGenerator {
-            vocabulary: unique_strings,
-            max_words,
-            rng,
-            already_generated: HashSet::new(),
-        }
-    }
-}
-
-impl Iterator for RandomStringGenerator {
-    type Item = String;
-
-    fn next(&mut self) -> Option<String> {
-        loop {
-            let n_words = self.rng.gen_range(1, self.max_words);
-            let generated_value = self
-                .vocabulary
-                .iter()
-                .choose_multiple(&mut self.rng, n_words)
-                .iter()
-                .map(|sample_string| sample_string.to_string())
-                .collect::<Vec<_>>()
-                .join(" ");
-            if !self.already_generated.contains(&generated_value) {
-                self.already_generated.insert(generated_value.clone());
-                break Some(generated_value);
-            }
-        }
-    }
-}
-
-fn generate_random_gazetteer(
-    vocab_size: usize,
-    nb_entity_values: usize,
-    max_words: usize,
-) -> (Gazetteer, RandomStringGenerator) {
-    let rsg = RandomStringGenerator::new(vocab_size, max_words);
-    let entity_values = rsg
-        .clone()
-        .take(nb_entity_values)
-        .map(|string| EntityValue {
-            resolved_value: string.to_lowercase(),
-            raw_value: string,
-        })
-        .collect();
-    let gazetteer = Gazetteer {
-        data: entity_values,
-    };
-    (gazetteer, rsg)
-}
-
-fn generate_random_parser(
-    vocab_size: usize,
-    nb_entity_values: usize,
-    max_words: usize,
-    minimum_tokens_ratio: f32,
-    n_stop_words: usize,
-) -> (Parser, RandomStringGenerator) {
-    let (gazetteer, rsg) = generate_random_gazetteer(vocab_size, nb_entity_values, max_words);
-    let parser = ParserBuilder::default()
-        .gazetteer(gazetteer)
-        .minimum_tokens_ratio(minimum_tokens_ratio)
-        .n_stop_words(n_stop_words)
-        .build()
-        .unwrap();
-    (parser, rsg)
-}
-
-fn get_low_redundancy_parser() -> (Parser, RandomStringGenerator) {
-    generate_random_parser(10000, 100000, 10, 0.5, 50)
-}
-
-fn get_high_redundancy_parser() -> (Parser, RandomStringGenerator) {
-    generate_random_parser(100, 100000, 5, 0.5, 50)
-}
-
-fn parsing_low_redundancy(c: &mut Criterion) {
-    let (parser, mut rsg) = get_low_redundancy_parser();
-    c.bench_function("Parse random value - low redundancy", move |b| {
-        b.iter(|| parser.run(&rsg.next().unwrap(), 10))
-    });
-}
-
-fn parsing_high_redundancy(c: &mut Criterion) {
-    let (parser, mut rsg) = get_high_redundancy_parser();
-    c.bench_function("Parse random value - high redundancy", move |b| {
-        b.iter(|| parser.run(&rsg.next().unwrap(), 10))
-    });
-}
-
-fn loading(c: &mut Criterion) {
-    let (gazetteer, _) = generate_random_gazetteer(100, 1000, 5);
-    let parser_directory = test_data_path().join("benches").join("parser");
-    if !parser_directory.exists() {
-        let parser = ParserBuilder::default()
-            .gazetteer(gazetteer)
-            .minimum_tokens_ratio(0.5)
-            .n_stop_words(50)
-            .build()
-            .unwrap();
-
-        parser.dump(&parser_directory).unwrap();
-    }
-    c.bench_function(
-        "Loading random gazetteer parser with low redundancy",
-        move |b| b.iter(|| Parser::from_folder(parser_directory.clone()).unwrap()),
-    );
-}
-
-criterion_group!(
-    benches,
-    parsing_low_redundancy,
-    parsing_high_redundancy,
-    loading
-);
-criterion_main!(benches);
diff --git a/data/benches/.gitignore b/data/benches/.gitignore
deleted file mode 100644
index 763d456..0000000
--- a/data/benches/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-parser
diff --git a/examples/entity_parsing_from_scratch.rs b/examples/entity_parsing_from_scratch.rs
deleted file mode 100644
index 23d8df0..0000000
--- a/examples/entity_parsing_from_scratch.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-extern crate gazetteer_entity_parser;
-
-use gazetteer_entity_parser::*;
-
-fn main() {
-    let gazetteer = gazetteer!(
-        ("king of pop", "Michael Jackson"),
-        ("the rolling stones", "The Rolling Stones"),
-        ("the crying stones", "The Crying Stones"),
-        ("the fab four", "The Beatles"),
-        ("queen of soul", "Aretha Franklin"),
-    );
-    let parser = ParserBuilder::default()
-        .gazetteer(gazetteer)
-        .minimum_tokens_ratio(2. / 3.)
-        .build()
-        .unwrap();
-
-    let sentence = "My favourite artists are the stones and fab four";
-    let extracted_entities = parser.run(sentence, 5);
-    assert_eq!(
-        extracted_entities,
-        vec![
-            ParsedValue {
-                matched_value: "the stones".to_string(),
-                resolved_value: ResolvedValue {
-                    resolved: "The Rolling Stones".to_string(),
-                    raw_value: "the rolling stones".to_string(),
-                },
-                alternatives: vec![ResolvedValue {
-                    resolved: "The Crying Stones".to_string(),
-                    raw_value: "the crying stones".to_string(),
-                }],
-                range: 25..35,
-            },
-            ParsedValue {
-                matched_value: "fab four".to_string(),
-                resolved_value: ResolvedValue {
-                    resolved: "The Beatles".to_string(),
-                    raw_value: "the fab four".to_string(),
-                },
-                alternatives: vec![],
-                range: 40..48,
-            }
-        ]
-    );
-}
diff --git a/examples/interactive_parsing_cli.rs b/examples/interactive_parsing_cli.rs
deleted file mode 100644
index 075b48d..0000000
--- a/examples/interactive_parsing_cli.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-extern crate clap;
-extern crate gazetteer_entity_parser;
-extern crate serde_json;
-
-use std::io::Write;
-use std::{fs, io};
-
-use clap::{App, Arg};
-
-use gazetteer_entity_parser::{Gazetteer, Parser, ParserBuilder};
-
-fn main() {
-    let mut app = App::new("gazetteer-entity-parser-demo")
-        .about("Interactive CLI for parsing gazetteer entities")
-        .arg(
-            Arg::with_name("parser")
-                .short("p")
-                .long("--parser")
-                .takes_value(true)
-                .help("path to the parser directory"),
-        )
-        .arg(
-            Arg::with_name("gazetteer")
-                .short("g")
-                .long("--gazetteer")
-                .takes_value(true)
-                .help("path to the json gazetteer file"),
-        )
-        .arg(
-            Arg::with_name("opt_nb_stop_words")
-                .short("n")
-                .long("--nb-stop-words")
-                .takes_value(true)
-                .help("number of stop words to use"),
-        )
-        .arg(
-            Arg::with_name("opt_tokens_ratio")
-                .short("r")
-                .long("--ratio")
-                .takes_value(true)
-                .help("minimum tokens ratio for the parser"),
-        )
-        .arg(
-            Arg::with_name("opt_max_alternatives")
-                .short("a")
-                .long("--alternatives")
-                .takes_value(true)
-                .help("maximum number of alternative resolved values"),
-        );
-    let matches = app.clone().get_matches();
-
-    let opt_nb_stop_words = matches
-        .value_of("opt_nb_stop_words")
-        .map(|nb_str| nb_str.to_string().parse::<usize>().unwrap());
-
-    let opt_tokens_ratio = matches
-        .value_of("opt_tokens_ratio")
-        .map(|ratio_str| ratio_str.to_string().parse::<f32>().unwrap());
-    let max_alternatives = matches
-        .value_of("opt_max_alternatives")
-        .map(|max_str| max_str.to_string().parse::<usize>().unwrap())
-        .unwrap_or(5);
-
-    if let Some(parser) = matches
-        .value_of("parser")
-        .map(|parser_dir| {
-            println!("\nLoading the parser...");
-            let mut parser = Parser::from_folder(parser_dir).unwrap();
-            if let Some(ratio) = opt_tokens_ratio {
-                parser.set_threshold(ratio);
-            };
-            if let Some(nb_stop_words) = opt_nb_stop_words {
-                parser.set_stop_words(nb_stop_words, None);
-            };
-            parser
-        })
-        .or_else(|| {
-            matches.value_of("gazetteer").map(|gazetteer_path| {
-                println!("\nLoading the gazetteer...");
-                let gazetteer_file = fs::File::open(&gazetteer_path).unwrap();
-                let gazetteer: Gazetteer = serde_json::from_reader(gazetteer_file).unwrap();
-
-                println!("\nBuilding the parser...");
-                ParserBuilder::default()
-                    .gazetteer(gazetteer)
-                    .n_stop_words(opt_nb_stop_words.unwrap_or(0))
-                    .minimum_tokens_ratio(opt_tokens_ratio.unwrap_or(1.0))
-                    .build()
-                    .unwrap()
-            })
-        })
-    {
-        loop {
-            print!("> ");
-            io::stdout().flush().unwrap();
-            let mut query = String::new();
-            io::stdin().read_line(&mut query).unwrap();
-            let result = parser.run(query.trim(), max_alternatives);
-            println!("{:?}", result);
-        }
-    } else {
-        app.print_long_help().unwrap();
-    }
-}
diff --git a/src/utils.rs b/src/utils.rs
index 5efde2e..ff8eeca 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -79,10 +79,7 @@ mod tests {
 
         let mut tokenizer = whitespace_tokenizer("дра \t नमस्ते");
         assert_eq!(tokenizer.next(), Some((0..3, "дра".to_string())));
-        assert_eq!(
-            tokenizer.next(),
-            Some((6..12, "नमस्ते".to_string()))
-        );
+        assert_eq!(tokenizer.next(), Some((6..12, "नमस्ते".to_string())));
 
         let mut tokenizer = whitespace_tokenizer("je veux écouter les rolling stones");
         assert_eq!(tokenizer.next(), Some((0..2, "je".to_string())));

From 60fc9b304d9099f69409ae339757d7a3b1decd73 Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrienball3@gmail.com>
Date: Mon, 29 Apr 2024 15:48:10 +0200
Subject: [PATCH 2/3] Update README

---
 README.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index f235310..d4e74f6 100644
--- a/README.rst
+++ b/README.rst
@@ -1,8 +1,7 @@
 Gazetteer Entity Parser
 =======================
 
-.. image:: https://travis-ci.org/snipsco/gazetteer-entity-parser.svg?branch=master
-   :target: https://travis-ci.org/snipsco/gazetteer-entity-parser
+**THIS LIBRARY IS NOT ACTIVELY MAINTAINED ANYMORE**
 
 This Rust library allows to parse and resolve entity values based on a gazetteer, in the context of
 an `Information Extraction <https://en.wikipedia.org/wiki/Information_extraction>`_ task.

From 56656fc2ffd2ca7295e8bdcb801f4c72d41d1b1d Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrienball3@gmail.com>
Date: Mon, 29 Apr 2024 16:06:47 +0200
Subject: [PATCH 3/3] Fix clippy issues

---
 src/data.rs            |   6 +--
 src/parser.rs          | 103 +++++++++++++++++++++--------------------
 src/parser_registry.rs |  12 +++--
 src/symbol_table.rs    |   5 +-
 4 files changed, 66 insertions(+), 60 deletions(-)

diff --git a/src/data.rs b/src/data.rs
index e9570ab..9e88837 100644
--- a/src/data.rs
+++ b/src/data.rs
@@ -15,8 +15,7 @@ impl EntityValue {
     pub fn into_tokenized(self) -> TokenizedEntityValue {
         TokenizedEntityValue {
             resolved_value: self.resolved_value,
-            tokens: whitespace_tokenizer(&*self.raw_value)
-                .into_iter()
+            tokens: whitespace_tokenizer(&self.raw_value)
                 .map(|(_, token)| token)
                 .collect(),
         }
@@ -128,7 +127,7 @@ impl Gazetteer {
 
     /// Extend the Gazetteer with the values of another Gazetteer
     pub fn extend(&mut self, gazetteer: Self) {
-        self.data.extend(gazetteer.data.into_iter())
+        self.data.extend(gazetteer.data)
     }
 }
 
@@ -163,6 +162,7 @@ impl Ord for ParsedValue {
 }
 
 impl PartialOrd for ParsedValue {
+    #[allow(clippy::non_canonical_partial_ord_impl)]
     fn partial_cmp(&self, other: &ParsedValue) -> Option<Ordering> {
         if self.range.end <= other.range.start {
             Some(Ordering::Less)
diff --git a/src/parser.rs b/src/parser.rs
index 65a8a78..af67013 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,14 +1,3 @@
-use crate::constants::*;
-use crate::data::EntityValue;
-use crate::errors::*;
-use crate::parser_registry::ParserRegistry;
-use crate::utils::{check_threshold, whitespace_tokenizer};
-use crate::ParsedValue;
-use failure::{format_err, ResultExt};
-use fnv::{FnvHashMap, FnvHashSet};
-use rmp_serde::{from_read, Serializer};
-use serde::{Deserialize, Serialize};
-use serde_json;
 use std::cmp::Ordering;
 use std::collections::hash_map::Entry;
 use std::collections::{BinaryHeap, HashSet};
@@ -16,6 +5,18 @@ use std::fs;
 use std::ops::Range;
 use std::path::Path;
 
+use failure::{format_err, ResultExt};
+use fnv::{FnvHashMap, FnvHashSet};
+use rmp_serde::{from_read, Serializer};
+use serde::{Deserialize, Serialize};
+
+use crate::constants::*;
+use crate::data::EntityValue;
+use crate::errors::*;
+use crate::parser_registry::ParserRegistry;
+use crate::utils::{check_threshold, whitespace_tokenizer};
+use crate::ParsedValue;
+
 /// Struct representing the parser. The Parser will match the longest possible contiguous
 /// substrings of a query that match partial entity values. The order in which the values are
 /// added to the parser matters: In case of ambiguity between two parsings, the Parser will output
@@ -73,19 +74,18 @@ impl PossibleMatch {
 }
 
 impl Ord for PossibleMatch {
+    #[allow(clippy::if_same_then_else)]
     fn cmp(&self, other: &PossibleMatch) -> Ordering {
         if self.n_consumed_tokens < other.n_consumed_tokens {
             Ordering::Less
         } else if self.n_consumed_tokens > other.n_consumed_tokens {
             Ordering::Greater
+        } else if self.raw_value_length < other.raw_value_length {
+            Ordering::Greater
+        } else if self.raw_value_length > other.raw_value_length {
+            Ordering::Less
         } else {
-            if self.raw_value_length < other.raw_value_length {
-                Ordering::Greater
-            } else if self.raw_value_length > other.raw_value_length {
-                Ordering::Less
-            } else {
-                other.rank.cmp(&self.rank)
-            }
+            other.rank.cmp(&self.rank)
         }
     }
 }
@@ -183,7 +183,7 @@ impl Parser {
             .with_context(|_| format_err!("Error when serializing the parser's metadata"))?;
 
         let parser_path = folder_name.as_ref().join(config.parser_filename);
-        let mut writer = fs::File::create(&parser_path)
+        let mut writer = fs::File::create(parser_path)
             .with_context(|_| format_err!("Error when creating the parser file"))?;
 
         self.serialize(&mut Serializer::new(&mut writer))
@@ -201,14 +201,14 @@ impl Parser {
     /// Load a parser from a folder
     pub fn from_folder<P: AsRef<Path>>(folder_name: P) -> Result<Parser> {
         let metadata_path = folder_name.as_ref().join(METADATA_FILENAME);
-        let metadata_file = fs::File::open(&metadata_path)
+        let metadata_file = fs::File::open(metadata_path)
             .with_context(|_| format_err!("Error when opening the metadata file"))?;
 
         let config: ParserConfig = serde_json::from_reader(metadata_file)
             .with_context(|_| format_err!("Error when deserializing the metadata"))?;
 
         let parser_path = folder_name.as_ref().join(config.parser_filename);
-        let reader = fs::File::open(&parser_path)
+        let reader = fs::File::open(parser_path)
             .with_context(|_| format_err!("Error when opening the parser file"))?;
 
         Ok(from_read(reader)
@@ -270,14 +270,14 @@ impl Parser {
 
                     // Iterate over current possible matches containing the stop word and
                     // try to grow them (but do not initiate a new possible match)
-                    for (res_val, mut possible_match) in &mut partial_matches {
+                    for (res_val, possible_match) in &mut partial_matches {
                         if !res_vals_from_token.contains(res_val)
                             || self.registry.is_edge_case(*res_val)
                         {
                             continue;
                         }
                         self.update_previous_match(
-                            &mut possible_match,
+                            possible_match,
                             token_idx,
                             *value,
                             range.clone(),
@@ -308,6 +308,7 @@ impl Parser {
         group_matches(final_matches, max_alternatives)
     }
 
+    #[allow(clippy::too_many_arguments)]
     fn update_or_insert_possible_match(
         &self,
         value: u32,
@@ -315,7 +316,7 @@ impl Parser {
         token_idx: usize,
         range: Range<usize>,
         partial_matches: &mut FnvHashMap<u32, PossibleMatch>,
-        mut final_matches: &mut Vec<PossibleMatch>,
+        final_matches: &mut Vec<PossibleMatch>,
         skipped_tokens: &mut FnvHashMap<usize, (Range<usize>, u32)>,
         threshold: f32,
     ) {
@@ -327,21 +328,20 @@ impl Parser {
                     value,
                     range,
                     threshold,
-                    &mut final_matches,
+                    final_matches,
                 );
             }
             Entry::Vacant(entry) => {
-                self.insert_new_possible_match(
+                if let Some(new_possible_match) = self.insert_new_possible_match(
                     res_val,
                     value,
                     range,
                     token_idx,
                     threshold,
-                    &skipped_tokens,
-                )
-                .map(|new_possible_match| {
+                    skipped_tokens,
+                ) {
                     entry.insert(new_possible_match);
-                });
+                }
             }
         }
     }
@@ -353,7 +353,7 @@ impl Parser {
         value: u32,
         range: Range<usize>,
         threshold: f32,
-        ref mut final_matches: &mut Vec<PossibleMatch>,
+        final_matches: &mut Vec<PossibleMatch>,
     ) {
         let (rank, otokens) = self.registry.get_tokens(possible_match.resolved_value);
 
@@ -361,9 +361,12 @@ impl Parser {
             // Grow the last Possible Match
             // Find the next token in the resolved value that matches the
             // input token
-            for otoken_idx in (possible_match.last_token_in_resolution + 1)..otokens.len() {
-                let otok = otokens[otoken_idx];
-                if value == otok {
+            for (otoken_idx, otoken) in otokens
+                .iter()
+                .enumerate()
+                .skip(possible_match.last_token_in_resolution + 1)
+            {
+                if value == *otoken {
                     possible_match.range.end = range.end;
                     possible_match.n_consumed_tokens += 1;
                     possible_match.last_token_in_input = token_idx;
@@ -382,11 +385,10 @@ impl Parser {
             final_matches.push(possible_match.clone());
         }
         // Then we initialize a new PossibleMatch with the same res val
-        let last_token_in_resolution = otokens.iter().position(|e| *e == value).expect(&*format!(
-            "Missing token {} from list {:?}",
-            value,
-            otokens.clone()
-        ));
+        let last_token_in_resolution = otokens
+            .iter()
+            .position(|e| *e == value)
+            .unwrap_or_else(|| panic!("Missing token {} from list {:?}", value, otokens.clone()));
 
         *possible_match = PossibleMatch {
             resolved_value: possible_match.resolved_value,
@@ -414,11 +416,10 @@ impl Parser {
         skipped_tokens: &FnvHashMap<usize, (Range<usize>, u32)>,
     ) -> Option<PossibleMatch> {
         let (rank, otokens) = self.registry.get_tokens(res_val);
-        let last_token_in_resolution = otokens.iter().position(|e| *e == value).expect(&*format!(
-            "Missing token {} from list {:?}",
-            value,
-            otokens.clone()
-        ));
+        let last_token_in_resolution = otokens
+            .iter()
+            .position(|e| *e == value)
+            .unwrap_or_else(|| panic!("Missing token {} from list {:?}", value, otokens.clone()));
         let mut possible_match = PossibleMatch {
             resolved_value: res_val,
             range,
@@ -523,7 +524,7 @@ impl Parser {
                     possible_match.tokens_range.start <= **idx
                         && possible_match.tokens_range.end > **idx
                 })
-                .map(|idx| *idx)
+                .copied()
                 .collect();
 
             if !overlapping_tokens.is_empty() {
@@ -601,8 +602,8 @@ fn group_matches(
                 grouped_matches
             },
         )
-        .into_iter()
-        .map(|(_, mut matches)| {
+        .into_values()
+        .map(|mut matches| {
             let mut best_match = matches.pop().unwrap().clone();
             while !matches.is_empty()
                 && best_match.alternative_resolved_values.len() < max_alternatives
@@ -623,12 +624,14 @@ fn group_matches(
 
 #[cfg(test)]
 mod tests {
-    use super::*;
+    use failure::ResultExt;
+    use tempfile::tempdir;
+
     use crate::data::*;
     use crate::gazetteer;
     use crate::parser_builder::ParserBuilder;
-    use failure::ResultExt;
-    use tempfile::tempdir;
+
+    use super::*;
 
     fn get_license_info() -> LicenseInfo {
         let license_content = "Some content here".to_string();
diff --git a/src/parser_registry.rs b/src/parser_registry.rs
index 68392d4..1f5e61c 100644
--- a/src/parser_registry.rs
+++ b/src/parser_registry.rs
@@ -1,7 +1,9 @@
+use std::collections::{BTreeSet, HashSet};
+
+use serde::{Deserialize, Serialize};
+
 use crate::data::{RegisteredEntityValue, ResolvedValue, TokenizedEntityValue};
 use crate::symbol_table::{ResolvedSymbolTable, TokenSymbolTable};
-use serde::{Deserialize, Serialize};
-use std::collections::{BTreeSet, HashSet};
 
 type Rank = u32;
 
@@ -59,7 +61,7 @@ impl ParserRegistry {
                     .push(token_idx);
             }
         }
-        return Some(res_value_idx);
+        Some(res_value_idx)
     }
 
     /// Prepends a list of entity values to the parser and update the ranks accordingly.
@@ -131,7 +133,7 @@ impl ParserRegistry {
                     })
                     .collect()
             })
-            .unwrap_or_else(|| vec![]);
+            .unwrap_or_else(Vec::new);
 
         self.set_top_stop_words(n_stop_words);
     }
@@ -151,7 +153,7 @@ impl ParserRegistry {
             .into_iter()
             .take(nb_stop_words)
             .map(|(idx, _)| idx)
-            .chain(self.additional_stop_words.clone().into_iter())
+            .chain(self.additional_stop_words.clone())
             .collect();
 
         // Update the set of edge_cases. i.e. resolved values that only contain stop words
diff --git a/src/symbol_table.rs b/src/symbol_table.rs
index 2056b15..54ed851 100644
--- a/src/symbol_table.rs
+++ b/src/symbol_table.rs
@@ -1,6 +1,7 @@
-use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
 
+use serde::{Deserialize, Serialize};
+
 /// Implementation of a symbol table that
 /// - always maps a given index to a single string
 /// - allows mapping a string to several indices
@@ -16,7 +17,7 @@ impl TokenSymbolTable {
     pub fn add_symbol(&mut self, symbol: String) -> u32 {
         self.string_to_index
             .get(&symbol)
-            .map(|idx| *idx)
+            .copied()
             .unwrap_or_else(|| {
                 let symbol_index = self.available_index;
                 self.available_index += 1;