diff --git a/README.md b/README.md index d48ef45f..8ae75d4f 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ Learn more about using SEGUL API in the [documentation](https://www.segul.app/do | Feature | Quick Link | | ------------------------------ | ---------------------------------------------------------------------------------------------------------------------- | -| Alignment concatenation | [CLI](https://www.segul.app/docs/cli-usage/concat) / [GUI](https://www.segul.app/docs/gui-usage/align-concat) | +| Alignment concatenation | [CLI](https://www.segul.app/docs/cli-usage/alignments) / [GUI](https://www.segul.app/docs/gui-usage/align-concat) | | Alignment conversion | [CLI](https://www.segul.app/docs/cli-usage/convert) / [GUI](https://www.segul.app/docs/gui-usage/align-convert) | | Alignment filtering | [CLI](https://www.segul.app/docs/cli-usage/filter) / [GUI](https://www.segul.app/docs/gui-usage/align-filter) | | Alignment splitting | [CLI](https://www.segul.app/docs/cli-usage/split) / [GUI](https://www.segul.app/docs/gui-usage/align-split) | @@ -101,7 +101,7 @@ Learn more about using SEGUL API in the [documentation](https://www.segul.app/do | Alignment summary statistics | [CLI](https://www.segul.app/docs/cli-usage/summary) / [GUI](https://www.segul.app/docs/gui-usage/align-summary) | | Genomic summary statistics | [CLI](https://www.segul.app/docs/cli-usage/genomic) / [GUI](https://www.segul.app/docs/gui-usage/genomic) | | Sequence extraction | [CLI](https://www.segul.app/docs/cli-usage/extract) / [GUI](https://www.segul.app/docs/gui-usage/sequence-extract) | -| Sequence filtering | [CLI](https://www.segul.app/docs/cli-usage/seq_filter) / GUI feature in development | +| Sequence filtering | [CLI](https://www.segul.app/docs/cli-usage/seq_filter) / GUI feature in development | | Sequence ID extraction | [CLI](https://www.segul.app/docs/cli-usage/id) / [GUI](https://www.segul.app/docs/gui-usage/sequence-id) | | Sequence ID mapping | [CLI](https://www.segul.app/docs/cli-usage/map) / [GUI](https://www.segul.app/docs/gui-usage/sequence-id-map) | | Sequence ID renaming | [CLI](https://www.segul.app/docs/cli-usage/rename) / [GUI](https://www.segul.app/docs/gui-usage/sequence-rename) | diff --git a/src/core/align/unalign.rs b/src/core/align/unalign.rs index f2ad6754..423e4bb9 100644 --- a/src/core/align/unalign.rs +++ b/src/core/align/unalign.rs @@ -76,7 +76,7 @@ impl<'a> UnalignAlignment<'a> { /// /// let input_fmt = InputFmt::Nexus; /// let datatype = DataType::Dna; - /// let input_dir = Path::new("tests/files/concat"); + /// let input_dir = Path::new("tests/files/alignments"); /// // Find matching alignment files in the input directory /// let files = SeqFileFinder::new(Path::new(input_dir)).find(&input_fmt); /// // Replace the temp directory with your own directory. @@ -93,10 +93,6 @@ impl<'a> UnalignAlignment<'a> { /// ); /// handle.unalign(); pub fn unalign(&self) { - if self.output_fmt != &OutputFmt::Fasta || self.output_fmt != &OutputFmt::FastaInt { - log::warn!("Unalign feature only supports fasta or fasta-int output format"); - return; - } let spin = utils::set_spinner(); spin.set_message("Converting un-aligned sequence files..."); self.input_files.par_iter().for_each(|file| { diff --git a/src/core/sequence/filter.rs b/src/core/sequence/filter.rs index 99238f0b..aadb134f 100644 --- a/src/core/sequence/filter.rs +++ b/src/core/sequence/filter.rs @@ -97,7 +97,7 @@ impl<'a> SequenceFiltering<'a> { /// /// let input_fmt = InputFmt::Nexus; /// let datatype = DataType::Dna; - /// let input_dir = Path::new("tests/files/concat"); + /// let input_dir = Path::new("tests/files/alignments"); /// let files = SeqFileFinder::new(Path::new(input_dir)).find(&input_fmt); /// // Replace the temp directory with your own directory. /// let output = TempDir::new("tempt").unwrap(); @@ -224,7 +224,7 @@ mod tests { #[test] fn test_filter_sequences_by_length() { - let dir = Path::new("tests/files/concat"); + let dir = Path::new("tests/files/alignments"); let params = SeqFilteringParameters::MinSequenceLength(7); setup!(dir, handle, params, output); handle.filter(); diff --git a/src/core/sequence/id.rs b/src/core/sequence/id.rs index eb1b799e..d7806799 100644 --- a/src/core/sequence/id.rs +++ b/src/core/sequence/id.rs @@ -59,8 +59,8 @@ impl<'a> SequenceID<'a> { /// use segul::helper::types::{DataType, InputFmt}; /// use tempdir::TempDir; /// - /// let alignment_2 = PathBuf::from("tests/files/concat/gene_2.nex"); - /// let alignment_1 = PathBuf::from("tests/files/concat/gene_1.nex"); + /// let alignment_2 = PathBuf::from("tests/files/alignments/gene_2.nex"); + /// let alignment_1 = PathBuf::from("tests/files/alignments/gene_1.nex"); /// let files = vec![alignment_1, alignment_2]; /// let output = TempDir::new("tempt").unwrap(); /// let handle = SequenceID::new(&files, &InputFmt::Auto, &DataType::Dna, Path::new(output.path()), None); diff --git a/src/helper/concat.rs b/src/helper/concat.rs index 88391bfb..930a80e4 100644 --- a/src/helper/concat.rs +++ b/src/helper/concat.rs @@ -42,8 +42,8 @@ impl ConcatParams { /// use segul::helper::types::{DataType, InputFmt}; /// /// let mut files = vec![ -/// PathBuf::from("tests/files/concat/gene_1.nex"), -/// PathBuf::from("tests/files/concat/gene_2.nex"), +/// PathBuf::from("tests/files/alignments/gene_1.nex"), +/// PathBuf::from("tests/files/alignments/gene_2.nex"), /// ]; /// let spinner = segul::helper::utils::set_spinner(); /// let input_fmt = InputFmt::Nexus; @@ -188,7 +188,7 @@ mod test { #[test] fn test_concat_nexus() { - let path = Path::new("tests/files/concat/"); + let path = Path::new("tests/files/alignments/"); let mut files = SeqFileFinder::new(path).find(&InputFmt::Nexus); let mut concat = Concat::new(&mut files, &InputFmt::Nexus, &DNA); let spin = utils::set_spinner(); @@ -199,7 +199,7 @@ mod test { #[test] #[should_panic] fn test_get_alignment_panic() { - let path = Path::new("tests/files/concat/"); + let path = Path::new("tests/files/alignments/"); let mut files = SeqFileFinder::new(path).find(&InputFmt::Nexus); let concat = Concat::new(&mut files, &InputFmt::Nexus, &DNA); concat.get_alignment(Path::new(".")); @@ -207,7 +207,7 @@ mod test { #[test] fn test_concat_check_result() { - let path = Path::new("tests/files/concat/"); + let path = Path::new("tests/files/alignments/"); let mut files = SeqFileFinder::new(path).find(&InputFmt::Nexus); let mut concat = Concat::new(&mut files, &InputFmt::Nexus, &DNA); let spin = utils::set_spinner(); @@ -219,7 +219,7 @@ mod test { #[test] fn test_concat_partition() { - let path = Path::new("tests/files/concat/"); + let path = Path::new("tests/files/alignments/"); let mut files = SeqFileFinder::new(path).find(&InputFmt::Nexus); let mut concat = Concat::new(&mut files, &InputFmt::Nexus, &DNA); let spin = utils::set_spinner(); @@ -246,7 +246,7 @@ mod test { #[test] fn test_header_datatype() { - let path = Path::new("tests/files/concat/"); + let path = Path::new("tests/files/alignments/"); let mut files = SeqFileFinder::new(path).find(&InputFmt::Nexus); let mut concat = Concat::new(&mut files, &InputFmt::Nexus, &DataType::Aa); concat.match_header_datatype(); diff --git a/src/helper/finder.rs b/src/helper/finder.rs index 96b6a057..2595486b 100644 --- a/src/helper/finder.rs +++ b/src/helper/finder.rs @@ -227,7 +227,7 @@ impl<'a> SeqFileFinder<'a> { /// use segul::helper::types::InputFmt; /// use segul::helper::finder::SeqFileFinder; /// - /// let dir = Path::new("tests/files/concat"); + /// let dir = Path::new("tests/files/alignments"); /// let input_fmt = InputFmt::Nexus; /// let files = SeqFileFinder::new(&dir).find(&input_fmt); /// assert_eq!(files.len(), 4); @@ -250,10 +250,9 @@ impl<'a> SeqFileFinder<'a> { /// # Example /// ``` /// use std::path::Path; - /// use segul::helper::types::InputFmt; /// use segul::helper::finder::SeqFileFinder; /// - /// let dir = Path::new("tests/files/concat"); + /// let dir = Path::new("tests/files/alignments"); /// let files = SeqFileFinder::new(&dir).find_recursive(); /// assert_eq!(files.len(), 4); /// ``` @@ -261,6 +260,29 @@ impl<'a> SeqFileFinder<'a> { walk_dir!(self, re_match_sequence_lazy) } + /// Find input files for sequence and alignment, recursively. + /// Limit search to only the input format. + /// + /// # Example + /// ``` + /// use std::path::Path; + /// use segul::helper::types::InputFmt; + /// use segul::helper::finder::SeqFileFinder; + /// + /// let dir = Path::new("tests/files/alignments"); + /// let files = SeqFileFinder::new(&dir).find_recursive_only(&InputFmt::Nexus); + /// assert_eq!(files.len(), 4); + pub fn find_recursive_only(&self, input_fmt: &'a InputFmt) -> Vec { + let files = match input_fmt { + InputFmt::Fasta => walk_dir!(self, re_matches_fasta_lazy), + InputFmt::Nexus => walk_dir!(self, re_match_nexus_lazy), + InputFmt::Phylip => walk_dir!(self, re_match_phylip_lazy), + _ => unreachable!(), + }; + + files + } + fn check_results(&self, files: &[PathBuf]) { if files.is_empty() { panic!( @@ -305,6 +327,22 @@ fn re_match_sequence_lazy(fname: &str) -> bool { RE.is_match(fname) } +fn re_match_nexus_lazy(fname: &str) -> bool { + lazy_static! { + static ref RE: Regex = Regex::new(r"(?i)(.nex*|.nxs)(?:.*)").unwrap(); + } + + RE.is_match(fname) +} + +fn re_match_phylip_lazy(fname: &str) -> bool { + lazy_static! { + static ref RE: Regex = Regex::new(r"(?i)(.phy*|.fna|.fa*)(?:.*)").unwrap(); + } + + RE.is_match(fname) +} + /// Parse IDs from input sequence files. /// # Example /// ``` @@ -314,8 +352,8 @@ fn re_match_sequence_lazy(fname: &str) -> bool { /// use indexmap::IndexSet; /// /// let files = vec![ -/// PathBuf::from("tests/files/concat/gene_1.nex"), -/// PathBuf::from("tests/files/concat/gene_2.nex"), +/// PathBuf::from("tests/files/alignments/gene_1.nex"), +/// PathBuf::from("tests/files/alignments/gene_2.nex"), /// ]; /// /// let input_fmt = InputFmt::Nexus; @@ -404,7 +442,7 @@ mod test { macro_rules! input { ($files: ident) => { - let path = Path::new("tests/files/concat"); + let path = Path::new("tests/files/alignments"); let mut $files = SeqFileFinder::new(path); }; @@ -451,7 +489,7 @@ mod test { input!(files); let fmt = InputFmt::Nexus; files.pattern(&fmt); - assert_eq!("tests/files/concat/*.nex*", files.pattern); + assert_eq!("tests/files/alignments/*.nex*", files.pattern); } #[test] diff --git a/tests/concat.rs b/tests/concat.rs index d67713a2..c3030136 100644 --- a/tests/concat.rs +++ b/tests/concat.rs @@ -9,7 +9,7 @@ use segul::helper::types::InputFmt; #[test] fn test_concat() { - initiate_cmd!(cmd, "align", "concat", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "align", "concat", "tests/files/alignments/", tmp_dir); cmd.arg("--partition-format") .arg("raxml") .assert() @@ -19,7 +19,7 @@ fn test_concat() { #[test] fn test_concat_nexus_part() { - initiate_cmd!(cmd, "align", "concat", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "align", "concat", "tests/files/alignments/", tmp_dir); cmd.arg("--partition-format") .arg("nexus") .assert() diff --git a/tests/convert.rs b/tests/convert.rs index ed5fd4c7..b62e07c5 100644 --- a/tests/convert.rs +++ b/tests/convert.rs @@ -9,7 +9,7 @@ use segul::helper::types::InputFmt; #[test] fn test_convert() { - initiate_cmd!(cmd, "align", "convert", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "align", "convert", "tests/files/alignments/", tmp_dir); cmd.arg("-F").arg("phylip").assert().success(); test_results!(4, tmp_dir, "Align-Convert", Phylip); } diff --git a/tests/extract.rs b/tests/extract.rs index 7f03c106..d1d56528 100644 --- a/tests/extract.rs +++ b/tests/extract.rs @@ -9,7 +9,7 @@ use segul::helper::types::InputFmt; #[test] fn test_extract() { - initiate_cmd!(cmd, "sequence", "extract", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "extract", "tests/files/alignments/", tmp_dir); cmd.arg("--id=ABCD").assert().success(); test_results!(4, tmp_dir, "Sequence-Extract", Nexus); } @@ -17,7 +17,7 @@ fn test_extract() { #[test] #[should_panic] fn test_conflicting_extract_cmd() { - initiate_cmd!(cmd, "sequence", "extract", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "extract", "tests/files/alignments/", tmp_dir); cmd.arg("--id") .arg("ABCD") .arg("--re=^AB") @@ -28,18 +28,18 @@ fn test_conflicting_extract_cmd() { #[test] #[should_panic] fn test_no_extract_cmd() { - initiate_cmd!(cmd, "sequence", "extract", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "extract", "tests/files/alignments/", tmp_dir); cmd.arg("--id") .arg("ABCD") .arg("--file") - .arg("tests/files/concat/concat.nex") + .arg("tests/files/alignments/alignments.nex") .assert() .success(); } #[test] fn test_extract_re() { - initiate_cmd!(cmd, "sequence", "extract", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "extract", "tests/files/alignments/", tmp_dir); cmd.arg("--re=^AB").assert().success(); test_results!(4, tmp_dir, "Sequence-Extract", Nexus); } diff --git a/tests/files/concat/gene_1.nex b/tests/files/alignments/gene_1.nex similarity index 100% rename from tests/files/concat/gene_1.nex rename to tests/files/alignments/gene_1.nex diff --git a/tests/files/concat/gene_2.nex b/tests/files/alignments/gene_2.nex similarity index 100% rename from tests/files/concat/gene_2.nex rename to tests/files/alignments/gene_2.nex diff --git a/tests/files/concat/gene_3.nex b/tests/files/alignments/gene_3.nex similarity index 100% rename from tests/files/concat/gene_3.nex rename to tests/files/alignments/gene_3.nex diff --git a/tests/files/concat/gene_4.nexus b/tests/files/alignments/gene_4.nexus similarity index 100% rename from tests/files/concat/gene_4.nexus rename to tests/files/alignments/gene_4.nexus diff --git a/tests/id.rs b/tests/id.rs index df134b70..471805bc 100644 --- a/tests/id.rs +++ b/tests/id.rs @@ -8,7 +8,7 @@ use segul::parser::txt; #[test] fn test_id_success() { - initiate_cmd!(cmd, "sequence", "id", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "id", "tests/files/alignments/", tmp_dir); cmd.assert().success(); let pred = predicates::path::is_file(); let res_path = tmp_dir.path().join("SEGUL-ID").join("id.txt"); diff --git a/tests/remove.rs b/tests/remove.rs index 96ff77ad..4ba71abf 100644 --- a/tests/remove.rs +++ b/tests/remove.rs @@ -9,14 +9,14 @@ use segul::helper::types::InputFmt; #[test] fn test_remove_id() { - initiate_cmd!(cmd, "sequence", "remove", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "remove", "tests/files/alignments/", tmp_dir); cmd.arg("--id=ABCD").assert().success(); test_results!(3, tmp_dir, "Sequence-Remove", Nexus); } #[test] fn test_remove_re() { - initiate_cmd!(cmd, "sequence", "remove", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "remove", "tests/files/alignments/", tmp_dir); cmd.arg("--re=E$").assert().success(); test_results!(4, tmp_dir, "Sequence-Remove", Nexus); } diff --git a/tests/rename.rs b/tests/rename.rs index 10464237..0d10f7ce 100644 --- a/tests/rename.rs +++ b/tests/rename.rs @@ -9,7 +9,7 @@ use segul::helper::types::InputFmt; #[test] fn test_rename() { - initiate_cmd!(cmd, "sequence", "rename", "tests/files/concat/", tmp_dir); + initiate_cmd!(cmd, "sequence", "rename", "tests/files/alignments/", tmp_dir); cmd.arg("--remove=D").assert().success(); test_results!(4, tmp_dir, "Sequence-Rename", Nexus); } diff --git a/tests/translate.rs b/tests/translate.rs index ee0f9369..d5e68a06 100644 --- a/tests/translate.rs +++ b/tests/translate.rs @@ -9,7 +9,13 @@ use segul::helper::types::InputFmt; #[test] fn test_dna_translation() { - initiate_cmd!(cmd, "sequence", "translate", "tests/files/concat/", tmp_dir); + initiate_cmd!( + cmd, + "sequence", + "translate", + "tests/files/alignments/", + tmp_dir + ); cmd.assert().success(); test_results!(4, tmp_dir, "Sequence-Translate", Nexus); }