Skip to content

Commit

Permalink
Fixed minor bug with rarefaction depth
Browse files Browse the repository at this point in the history
  • Loading branch information
Khi Pin Chua committed Dec 31, 2024
1 parent 99f216c commit 01ebb33
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 11 deletions.
16 changes: 6 additions & 10 deletions modules/dada2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -235,31 +235,27 @@ process dada2_qc {
ninety=0.8
if [ \${number} -le 2 ];
then
rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
tail -n1 | cut -f2`
int_rarefaction_d=\${rarefaction_d%%.*}
echo \${int_rarefaction_d} > rarefaction_depth_suggested.txt
alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
tail -n1 | cut -f2`
int_alpha_d=\${alpha_d%%.*}
int_alpha_d=\${int_rarefaction_d}
elif [ \${number} -gt 2 ] && [ \${number} -lt 5 ];
then
result=`awk -v n="\$number" -v p=0.8 'BEGIN{print int(n * p)}'`
rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
head -n \${result} | tail -n1 | cut -f2`
int_rarefaction_d=\${rarefaction_d%%.*}
echo \${int_rarefaction_d} > rarefaction_depth_suggested.txt
alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
tail -n1 | cut -f2`
int_alpha_d=\${alpha_d%%.*}
int_alpha_d=\${int_rarefaction_d}
else
result=`awk -v n="\$number" -v p=0.8 'BEGIN{print int(n * p)}'`
rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
head -n \${result} | tail -n1 | cut -f2`
int_rarefaction_d=\${rarefaction_d%%.*}
echo \${int_rarefaction_d} > rarefaction_depth_suggested.txt
alpha_res=`awk -v n="\$number" 'BEGIN{print int(n * 0.2)}'`
alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
head -n \${alpha_res} | tail -n1 | cut -f2`
int_alpha_d=\${alpha_d%%.*}
fi
Expand Down
12 changes: 12 additions & 0 deletions modules/qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ process prepare_qiime2_manifest {
for i in \$(ls sample_ind_*); do sample=\$(cat \${i} | tr '\\n' '\\t' | cut -f1); fastq=\$(basename \$(cat \${i} | tr '\\n' '\\t' | cut -f2)); echo -e "\${sample}\t\${fastq}"; done >> samplefile.txt
rm -f sample_ind_*
# If pool column exists, split sample files
# First make sure metadata file is not empty
# Check if metadata file exists and is not empty
if [ ! -s "$metadata" ]; then
echo "Error: Metadata file is empty or does not exist"
exit 1
fi
poolyes=\$(csvtk headers -t $metadata | grep pool | wc -l)
if [[ \$poolyes -eq 1 ]]
then
Expand Down Expand Up @@ -190,6 +196,12 @@ process prepare_qiime2_manifest_skip_cutadapt {
for i in \$(ls sample_ind_*); do sample=\$(cat \${i} | tr '\\n' '\\t' | cut -f1); fastq=\$(basename \$(cat \${i} | tr '\\n' '\\t' | cut -f2)); echo -e "\${sample}\t\${fastq}"; done >> samplefile.txt
rm -f sample_ind_*
# If pool column exists, split sample files
# First make sure metadata file is not empty
# Check if metadata file exists and is not empty
if [ ! -s "$metadata" ]; then
echo "Error: Metadata file is empty or does not exist"
exit 1
fi
poolyes=\$(csvtk headers -t $metadata | grep pool | wc -l)
if [[ \$poolyes -eq 1 ]]
then
Expand Down
2 changes: 1 addition & 1 deletion scripts/visualize_biom.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ dada2_qc <- dada2_qc %>%

```{r results="asis", echo=FALSE}
if(params$merged_tax_tab_file != ""){
cat("### Classification using Naive Bayes classifier with SILVA, GTDB and RefSeq + RDP \n")
cat("### Classification using Naive Bayes classifier with Greengenes2, GTDB and Silva \n")
cat(paste0("* ASVs classified at Species level: ", nrow(class_spec), " (", round(nrow(class_spec)/total_asv, 4)*100, "%) \n"))
cat(paste0("* Percentage reads belong to ASV classified at Species level: ", round(sum(otu_table(physeq_list[["besttax"]])[class_spec_uncultured$OTU,])/sum(otu_table(physeq_list[["besttax"]])), 2)*100, "% \n"))
cat(paste0("* ASVs classified at Genus level: ", nrow(class_gen), " (", round(nrow(class_gen)/total_asv, 4)*100, "%) \n"))
Expand Down

0 comments on commit 01ebb33

Please sign in to comment.