Fixed minor bug with rarefaction depth

PacificBiosciences · Dec 31, 2024 · 01ebb33 · 01ebb33
1 parent 99f216c
commit 01ebb33
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 11 deletions.
diff --git a/modules/dada2.nf b/modules/dada2.nf
@@ -235,31 +235,27 @@ process dada2_qc {
     ninety=0.8
     if [ \${number} -le 2 ];
     then
-        rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
+        rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
             tail -n1 | cut -f2`
         int_rarefaction_d=\${rarefaction_d%%.*}
         echo \${int_rarefaction_d} > rarefaction_depth_suggested.txt
-        alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
-            tail -n1 | cut -f2`
-        int_alpha_d=\${alpha_d%%.*}
+        int_alpha_d=\${int_rarefaction_d}
     elif [ \${number} -gt 2 ] && [ \${number} -lt 5 ];
     then
         result=`awk -v n="\$number" -v p=0.8 'BEGIN{print int(n * p)}'`
-        rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
+        rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
             head -n \${result} | tail -n1 | cut -f2`
         int_rarefaction_d=\${rarefaction_d%%.*}
         echo \${int_rarefaction_d} > rarefaction_depth_suggested.txt
-        alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
-            tail -n1 | cut -f2`
-        int_alpha_d=\${alpha_d%%.*}
+        int_alpha_d=\${int_rarefaction_d}
     else
         result=`awk -v n="\$number" -v p=0.8 'BEGIN{print int(n * p)}'`
-        rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
+        rarefaction_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
             head -n \${result} | tail -n1 | cut -f2`
         int_rarefaction_d=\${rarefaction_d%%.*}
         echo \${int_rarefaction_d} > rarefaction_depth_suggested.txt
         alpha_res=`awk -v n="\$number" 'BEGIN{print int(n * 0.2)}'`
-        alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -t, -k2 -nr | \
+        alpha_d=`cat dada2_table_summary/sample-frequency-detail.tsv | sort -k2 -nr | \
             head -n \${alpha_res} | tail -n1 | cut -f2`
         int_alpha_d=\${alpha_d%%.*}
     fi

diff --git a/modules/qc.nf b/modules/qc.nf
@@ -161,6 +161,12 @@ process prepare_qiime2_manifest {
     for i in \$(ls sample_ind_*); do sample=\$(cat \${i} | tr '\\n' '\\t' | cut -f1); fastq=\$(basename \$(cat \${i} | tr '\\n' '\\t' | cut -f2)); echo -e "\${sample}\t\${fastq}"; done >> samplefile.txt
     rm -f sample_ind_*
     # If pool column exists, split sample files
+    # First make sure metadata file is not empty
+    # Check if metadata file exists and is not empty
+    if [ ! -s "$metadata" ]; then
+        echo "Error: Metadata file is empty or does not exist"
+        exit 1
+    fi
     poolyes=\$(csvtk headers -t $metadata | grep pool | wc -l)
     if [[ \$poolyes -eq 1 ]]
     then
@@ -190,6 +196,12 @@ process prepare_qiime2_manifest_skip_cutadapt {
     for i in \$(ls sample_ind_*); do sample=\$(cat \${i} | tr '\\n' '\\t' | cut -f1); fastq=\$(basename \$(cat \${i} | tr '\\n' '\\t' | cut -f2)); echo -e "\${sample}\t\${fastq}"; done >> samplefile.txt
     rm -f sample_ind_*
     # If pool column exists, split sample files
+    # First make sure metadata file is not empty
+    # Check if metadata file exists and is not empty
+    if [ ! -s "$metadata" ]; then
+        echo "Error: Metadata file is empty or does not exist"
+        exit 1
+    fi
     poolyes=\$(csvtk headers -t $metadata | grep pool | wc -l)
     if [[ \$poolyes -eq 1 ]]
     then

diff --git a/scripts/visualize_biom.Rmd b/scripts/visualize_biom.Rmd
@@ -237,7 +237,7 @@ dada2_qc <- dada2_qc %>%
 
 ```{r results="asis", echo=FALSE}
 if(params$merged_tax_tab_file != ""){
-  cat("### Classification using Naive Bayes classifier with SILVA, GTDB and RefSeq + RDP  \n")
+  cat("### Classification using Naive Bayes classifier with Greengenes2, GTDB and Silva \n")
   cat(paste0("* ASVs classified at Species level: ", nrow(class_spec), " (", round(nrow(class_spec)/total_asv, 4)*100, "%)  \n"))
   cat(paste0("* Percentage reads belong to ASV classified at Species level: ", round(sum(otu_table(physeq_list[["besttax"]])[class_spec_uncultured$OTU,])/sum(otu_table(physeq_list[["besttax"]])), 2)*100, "%  \n"))
   cat(paste0("* ASVs classified at Genus level: ", nrow(class_gen), " (", round(nrow(class_gen)/total_asv, 4)*100, "%)  \n"))