Skip to content

Commit

Permalink
style: update authors in pyproject.toml and LICENSE
Browse files Browse the repository at this point in the history
  • Loading branch information
cauliyang committed Nov 4, 2024
1 parent e5bfd72 commit d53c716
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 17 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright 2024 Yangyang & Ting-you Wang

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@ requires = ["maturin>=1.2.1,<2"]
build-backend = "maturin"

[tool.poetry]
authors = ["Yangyang Li <[email protected]>"]
authors = [
"Yangyang Li <[email protected]>",
"Ting-you Wang <[email protected]>",
]
license = "Apache-2.0"
name = "deepchopper"
version = "1.2.5"
description = "A Genomic Language Model for Chimera Artifact Detection in Nanopore Direct RNA Sequencing"
readme = "README.md"

[project]
authors = [
{ name = "Yangyang Li", email = "<[email protected]>" },
{ name = "Ting-you Wang", email = "<[email protected]>" },
]
name = "deepchopper"
description = "A Genomic Language Model for Chimera Artifact Detection in Nanopore Direct RNA Sequencing"
authors = [{ name = "Yangyang Li", email = "<[email protected]>" }]
readme = "README.md"
requires-python = ">=3.10"
license = { file = "LICENSE" }
Expand Down
71 changes: 57 additions & 14 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use rayon::prelude::*;
use std::{fs::File, io::BufReader, ops::Range, path::Path};

use crate::smooth::majority_voting;
use log::info;

pub fn summary_predict_generic<D: PartialEq + Send + Sync + Copy>(
predictions: &[Vec<D>],
Expand Down Expand Up @@ -80,6 +81,11 @@ pub fn collect_and_split_dataset<P: AsRef<Path>>(
let val_count = total_reads * val_ratio;
let test_count = total_reads * test_ratio;

info!(
"train: {}, val: {}, test: {}",
train_count, val_count, test_count
);

let train_positive_count = train_count * positive_ratio;
let val_positive_count = val_count * positive_ratio;
let test_positive_count = test_count * positive_ratio;
Expand All @@ -88,6 +94,10 @@ pub fn collect_and_split_dataset<P: AsRef<Path>>(
let val_negative_count = (val_count * negative_ratio) as usize;
let test_negative_count = (test_count * negative_ratio) as usize;

info!("train positive: {}, train negative: {}", train_positive_count, train_negative_count);
info!("val positive: {}, val negative: {}", val_positive_count, val_negative_count);
info!("test positive: {}, test negative: {}", test_positive_count, test_negative_count);

let train_internal_adapter_count = (train_positive_count * internal_adapter_ratio) as usize;
let train_terminal_adapter_count = (train_positive_count * terminal_adapter_ratio) as usize;

Expand All @@ -97,6 +107,10 @@ pub fn collect_and_split_dataset<P: AsRef<Path>>(
let test_internal_adapter_count = (test_positive_count * internal_adapter_ratio) as usize;
let test_terminal_adapter_count = (test_positive_count * terminal_adapter_ratio) as usize;

info!("train internal adapter: {}, train terminal adapter: {}", train_internal_adapter_count, train_terminal_adapter_count);
info!("val internal adapter: {}, val terminal adapter: {}", val_internal_adapter_count, val_terminal_adapter_count);
info!("test internal adapter: {}, test terminal adapter: {}", test_internal_adapter_count, test_terminal_adapter_count);

let mut internal_fq_reader = File::open(internal_fq_path.as_ref())
.map(BufReader::new)
.map(fastq::Reader::new)?;
Expand Down Expand Up @@ -211,6 +225,11 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters<P: AsRef<Path>>(
let val_count = total_reads * val_ratio;
let test_count = total_reads * test_ratio;

info!(
"train: {}, val: {}, test: {}",
train_count, val_count, test_count
);

let train_positive_count = train_count * positive_ratio;
let val_positive_count = val_count * positive_ratio;
let test_positive_count = test_count * positive_ratio;
Expand All @@ -219,28 +238,37 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters<P: AsRef<Path>>(
let val_negative_count = (val_count * negative_ratio) as usize;
let test_negative_count = (test_count * negative_ratio) as usize;

info!("train positive: {}, train negative: {}", train_positive_count, train_negative_count);
info!("val positive: {}, val negative: {}", val_positive_count, val_negative_count);
info!("test positive: {}, test negative: {}", test_positive_count, test_negative_count);

let train_internal_adapter_count = (train_positive_count * iternal_adapter_ratio) as usize;
let train_terminal_adapter_count = train_positive_count * terminal_adapter_ratio;
let train_natural_terminal_adapter_count =
(train_terminal_adapter_count * natural_terminal_adapter_ratio) as usize;
let train_simulated_terminal_adapter_count =
(train_terminal_adapter_count * (1.0 - natural_terminal_adapter_ratio)) as usize;

let val_internal_adapter_count = (val_positive_count * iternal_adapter_ratio) as usize;
info!("train internal adapter: {}, train terminal adapter: {}", train_internal_adapter_count, train_terminal_adapter_count);

let val_internal_adapter_count = (val_positive_count * iternal_adapter_ratio) as usize;
let val_terminal_adapter_count = val_positive_count * terminal_adapter_ratio;
let val_natural_terminal_adapter_count =
(val_terminal_adapter_count * natural_terminal_adapter_ratio) as usize;
let val_simulated_terminal_adapter_count =
(val_terminal_adapter_count * (1.0 - natural_terminal_adapter_ratio)) as usize;

info!("val internal adapter: {}, val terminal adapter: {}", val_internal_adapter_count, val_terminal_adapter_count);

let test_internal_adapter_count = (test_positive_count * iternal_adapter_ratio) as usize;
let test_terminal_adapter_count = test_positive_count * terminal_adapter_ratio;
let test_natural_terminal_adapter_count =
(test_terminal_adapter_count * natural_terminal_adapter_ratio) as usize;
let test_simulated_terminal_adapter_count =
(test_terminal_adapter_count * (1.0 - natural_terminal_adapter_ratio)) as usize;

info!("test internal adapter: {}, test terminal adapter: {}", test_internal_adapter_count, test_terminal_adapter_count);

let mut internal_fq_reader = File::open(internal_fq_path.as_ref())
.map(BufReader::new)
.map(fastq::Reader::new)?;
Expand Down Expand Up @@ -367,7 +395,7 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter
test_ratio: f32, // 0.1
internal_adapter_ratio: f32, // 0.4
terminal_adapter_ratio: f32, // 0.4
both_terminal_adapter_ratio: f32, // 0.2
both_adapter_ratio: f32, // 0.2
natural_terminal_adapter_ratio: f32, // 0.5
positive_ratio: f32, // 0.9
prefix: Option<&str>,
Expand All @@ -378,7 +406,7 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter
));
}

if internal_adapter_ratio + terminal_adapter_ratio + both_terminal_adapter_ratio != 1.0 {
if internal_adapter_ratio + terminal_adapter_ratio + both_adapter_ratio != 1.0 {
return Err(anyhow::anyhow!(
"internal_adapter_ratio + terminal_adapter_ratio + both_terminal_adapter_ratio must be equal to 1.0"
));
Expand All @@ -391,6 +419,11 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter
let val_count = total_reads * val_ratio;
let test_count = total_reads * test_ratio;

info!(
"train: {}, val: {}, test: {}",
train_count, val_count, test_count
);

let train_positive_count = train_count * positive_ratio;
let val_positive_count = val_count * positive_ratio;
let test_positive_count = test_count * positive_ratio;
Expand All @@ -399,32 +432,42 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter
let val_negative_count = (val_count * negative_ratio) as usize;
let test_negative_count = (test_count * negative_ratio) as usize;

info!("train positive: {}, train negative: {}", train_positive_count, train_negative_count);
info!("val positive: {}, val negative: {}", val_positive_count, val_negative_count);
info!("test positive: {}, test negative: {}", test_positive_count, test_negative_count);

let train_internal_adapter_count = (train_positive_count * internal_adapter_ratio) as usize;
let train_terminal_adapter_count = train_positive_count * terminal_adapter_ratio;
let train_natural_terminal_adapter_count =
(train_terminal_adapter_count * natural_terminal_adapter_ratio) as usize;
let train_simulated_terminal_adapter_count =
(train_terminal_adapter_count * (1.0 - natural_terminal_adapter_ratio)) as usize;
let train_both_terminal_adapter_count =
(train_positive_count * both_terminal_adapter_ratio) as usize;
let train_both_adapter_count =
(train_positive_count * both_adapter_ratio) as usize;

info!("train internal adapter: {}, train terminal adapter: {}, train both: {}", train_internal_adapter_count, train_terminal_adapter_count, train_both_adapter_count);

let val_internal_adapter_count = (val_positive_count * internal_adapter_ratio) as usize;
let val_terminal_adapter_count = val_positive_count * terminal_adapter_ratio;
let val_natural_terminal_adapter_count =
(val_terminal_adapter_count * natural_terminal_adapter_ratio) as usize;
let val_simulated_terminal_adapter_count =
(val_terminal_adapter_count * (1.0 - natural_terminal_adapter_ratio)) as usize;
let val_both_terminal_adapter_count =
(val_positive_count * both_terminal_adapter_ratio) as usize;
let val_both_adapter_count =
(val_positive_count * both_adapter_ratio) as usize;

info!("val internal adapter: {}, val terminal adapter: {}, val both: {}", val_internal_adapter_count, val_terminal_adapter_count, val_both_adapter_count);

let test_internal_adapter_count = (test_positive_count * internal_adapter_ratio) as usize;
let test_terminal_adapter_count = test_positive_count * terminal_adapter_ratio;
let test_natural_terminal_adapter_count =
(test_terminal_adapter_count * natural_terminal_adapter_ratio) as usize;
let test_simulated_terminal_adapter_count =
(test_terminal_adapter_count * (1.0 - natural_terminal_adapter_ratio)) as usize;
let test_both_terminal_adapter_count =
(test_positive_count * both_terminal_adapter_ratio) as usize;
let test_both_adapter_count =
(test_positive_count * both_adapter_ratio) as usize;

info!("test internal adapter: {}, test terminal adapter: {}, test both: {}", test_internal_adapter_count, test_terminal_adapter_count, test_both_adapter_count);

let mut internal_fq_reader = File::open(internal_fq_path.as_ref())
.map(BufReader::new)
Expand Down Expand Up @@ -471,7 +514,7 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter
});
both_fq_reader
.records()
.take(train_both_terminal_adapter_count)
.take(train_both_adapter_count)
.for_each(|record| {
train_writer.write_record(&record.unwrap()).unwrap();
});
Expand Down Expand Up @@ -506,8 +549,8 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter
});
both_fq_reader
.records()
.skip(train_both_terminal_adapter_count)
.take(val_both_terminal_adapter_count)
.skip(train_both_adapter_count)
.take(val_both_adapter_count)
.for_each(|record| {
val_writer.write_record(&record.unwrap()).unwrap();
});
Expand Down Expand Up @@ -544,8 +587,8 @@ pub fn collect_and_split_dataset_with_natural_terminal_adapters_and_both_adapter

both_fq_reader
.records()
.skip(train_both_terminal_adapter_count + val_both_terminal_adapter_count)
.take(test_both_terminal_adapter_count)
.skip(train_both_adapter_count + val_both_adapter_count)
.take(test_both_adapter_count)
.for_each(|record| {
test_writer.write_record(&record.unwrap()).unwrap();
});
Expand Down

0 comments on commit d53c716

Please sign in to comment.