Fix multiple components (#162)

* output index when only_build_index is true * fix threads option * fix argument type and add log * fix output handling * fix output arguments and update docker image * fix log2stderr argument and remove discard reads option * remove echo * update bbsplit build index test * add workdir * apply suggestions from code review * accept more than two reference files * update changelog * update changelog * remove indentation * minor fixes * fix descriptions --------- Co-authored-by: Robrecht Cannoodt <[email protected]>
viash-hub · Nov 8, 2024 · b3fcd52 · b3fcd52
1 parent 06005a7
commit b3fcd52
Show file tree

Hide file tree

Showing 15 changed files with 65 additions and 73 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,18 @@
 
 * `cutadapt`: Fix the the non-functional `action` parameter (PR #161).
 
+* `bbmap_bbsplit`: Change argument type of `build` to `file` and add output argument `index` (PR #162).
+
+* `kallisto/kallisto_index`: Fix command script to use `--threads` option (PR #162).
+
+* `kallisto/kallisto_quant`: Change type of argument `output_dir` to `file` and add output argument `log` (PR #162).
+
+* `rsem/rsem_calculate_expression`: Fix output handling (PR #162).
+
+* `sortmerna`: Change type pf argument `aligned` to `file`; update docker image; accept more than two reference files (PR #162).
+
+* `umi_tools/umi_tools_extract`: Remove `umi_discard_reads` option and change `log2stderr` to input argument (PR #162).
+
 ## MINOR CHANGES
 
 * `agat_convert_bed2gff`: change type of argument `inflate_off` from `boolean_false` to `boolean_true` (PR #160).
@@ -38,6 +50,8 @@
 
 * Upgrade to Viash 0.9.0.
 
+* `bbmap_bbsplit`: Move to namespace `bbmap` (PR #162).
+
 # biobox 0.2.0
 
 ## BREAKING CHANGES

diff --git a/src/bbmap_bbsplit/config.vsh.yaml → src/bbmap/bbmap_bbsplit/config.vsh.yaml b/src/bbmap_bbsplit/config.vsh.yaml → src/bbmap/bbmap_bbsplit/config.vsh.yaml
@@ -30,12 +30,9 @@ argument_groups:
     type: boolean_true
     description: If set, only builds the index. Otherwise, mapping is performed.
   - name: "--build"
-    type: string
+    type: file
     description: |
-      Designate index to use. Corresponds to the number specified when building the index.
-      If building the index, this will be the build's id. If multiple references are indexed
-      in the same directory, each needs a unique build ID. Default: 1.
-    example: "1"
+      Index to be used for mapping. 
   - name: "--qin"
     type: string
     description: |
@@ -95,6 +92,12 @@ argument_groups:
 
 - name: "Output"
   arguments:
+  - name: "--index"
+    type: file
+    description: |
+      Location to write the index.
+    direction: output
+    example: BBSplit_index
   - name: "--fastq_1"
     type: file
     description: |

diff --git a/src/bbmap_bbsplit/help.txt → src/bbmap/bbmap_bbsplit/help.txt b/src/bbmap_bbsplit/help.txt → src/bbmap/bbmap_bbsplit/help.txt
diff --git a/src/bbmap_bbsplit/script.sh → src/bbmap/bbmap_bbsplit/script.sh b/src/bbmap_bbsplit/script.sh → src/bbmap/bbmap_bbsplit/script.sh
@@ -30,17 +30,17 @@ if [ ! -d "$par_build" ]; then
 fi
 
 if $par_only_build_index; then
-    if [ ${#refs[@]} -gt 1 ]; then
+    if [ "${#refs[@]}" -gt 1 ]; then
         bbsplit.sh \
             --ref_primary="$primary_ref" \
             "${refs[@]}" \
-            path=$par_build
+            path=$par_index
     else
         echo "ERROR: Please specify at least two reference fasta files."
     fi
 else
     IFS=";" read -ra input <<< "$par_input"
-    tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
+    tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX")
     index_files=''
     if [ -d "$par_build" ]; then
         index_files="path=$par_build"
@@ -51,7 +51,7 @@ else
     fi
 
     extra_args=""
-    if [ -n "$par_refstats" ]; then extra_args+=" --refstats $par_refstats"; fi
+    if [ -f "$par_refstats" ]; then extra_args+=" --refstats $par_refstats"; fi
     if [ -n "$par_ambiguous" ]; then extra_args+=" --ambiguous $par_ambiguous"; fi
     if [ -n "$par_ambiguous2" ]; then extra_args+=" --ambiguous2 $par_ambiguous2"; fi
     if [ -n "$par_minratio" ]; then extra_args+=" --minratio $par_minratio"; fi

diff --git a/src/bbmap_bbsplit/test.sh → src/bbmap/bbmap_bbsplit/test.sh b/src/bbmap_bbsplit/test.sh → src/bbmap/bbmap_bbsplit/test.sh
@@ -55,7 +55,7 @@ echo ">>> Building BBSplit index"
 "${meta_executable}" \
   --ref "genome.fasta;human.fa;sarscov2.fa" \
   --only_build_index \
-  --build "BBSplit_index" 
+  --index "BBSplit_index" 
 
 echo ">>> Check whether output exists"
 [ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1

diff --git a/src/kallisto/kallisto_index/Kallisto b/src/kallisto/kallisto_index/Kallisto
diff --git a/src/kallisto/kallisto_index/script.sh b/src/kallisto/kallisto_index/script.sh
@@ -28,7 +28,7 @@ kallisto index \
     ${par_min_size:+--min-size "${par_min_size}"} \
     ${par_ec_max_size:+--ec-max-size "${par_ec_max_size}"} \
     ${par_d_list:+--d-list "${par_d_list}"} \
-    ${meta_cpus:+--cpu "${meta_cpus}"} \
+    ${meta_cpus:+--threads "${meta_cpus}"} \
     ${par_tmp:+--tmp "${par_tmp}"} \
     "${par_input}"
 
diff --git a/src/kallisto/kallisto_quant/config.vsh.yaml b/src/kallisto/kallisto_quant/config.vsh.yaml
@@ -32,9 +32,15 @@ argument_groups:
   arguments:
   - name: "--output_dir"
     alternatives: ["-o"]
-    type: string
+    type: file
     description: Directory to write output to.
     required: true
+    direction: output
+  - name: "--log"
+    type: file
+    description: File containing log information from running kallisto quant
+    direction: output
+
 
 - name: "Options"
   arguments:

diff --git a/src/kallisto/kallisto_quant/script.sh b/src/kallisto/kallisto_quant/script.sh
@@ -41,6 +41,4 @@ kallisto quant \
     ${par_sd:+--sd "${par_sd}"} \
     ${par_seed:+--seed "${par_seed}"} \
     -o $par_output_dir \
-    ${input[*]}
-
-
+    ${input[*]} 2> >(tee -a $par_log >&2)
diff --git a/src/rsem/rsem_calculate_expression/script.sh b/src/rsem/rsem_calculate_expression/script.sh
@@ -5,13 +5,6 @@
 
 set -eo pipefail
 
-function clean_up {
-    rm -rf "$tmpdir"
-}
-trap clean_up EXIT
-
-tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
-
 if [ "$par_strandedness" == 'forward' ]; then
     strandedness='--strandedness forward'
 elif [ "$par_strandedness" == 'reverse' ]; then
@@ -22,14 +15,14 @@ fi
 
 IFS=";" read -ra input <<< $par_input
 
-INDEX=$(find -L $meta_resources_dir/$par_index -name "*.grp" | sed 's/\.grp$//')
+INDEX=$(find -L $par_index -name "*.grp" | sed 's/\.grp$//')
 
 unset_if_false=( par_paired par_quiet par_no_bam_output par_sampling_for_bam par_no_qualities 
                  par_alignments par_bowtie2 par_star par_hisat2_hca par_append_names 
                  par_single_cell_prior par_calc_pme par_calc_ci par_phred64_quals 
                  par_solexa_quals par_star_gzipped_read_file par_star_bzipped_read_file 
                  par_star_output_genome_bam par_estimate_rspd par_keep_intermediate_files 
-                 par_time par_run_pRSEM par_cap_stacked_chipseq_reads par_sort_bam_by_read_name )
+                 par_time par_run_pRSEM par_cap_stacked_chipseq_reads par_sort_bam_by_read_name par_sort_bam_by_coordinate )
 
 for par in ${unset_if_false[@]}; do
     test_val="${!par}"
@@ -60,12 +53,7 @@ rsem-calculate-expression \
     ${par_run_pRSEM:+--run-pRSEM} \
     ${par_cap_stacked_chipseq_reads:+--cap-stacked-chipseq-reads} \
     ${par_sort_bam_by_read_name:+--sort-bam-by-read-name} \
-    ${par_counts_gene:+--counts-gene "$par_counts_gene"} \
-    ${par_counts_transcripts:+--counts-transcripts "$par_counts_transcripts"} \
-    ${par_stat:+--stat "$par_stat"} \
-    ${par_bam_star:+--bam-star "$par_bam_star"} \
-    ${par_bam_genome:+--bam-genome "$par_bam_genome"} \
-    ${par_bam_transcript:+--bam-transcript "$par_bam_transcript"} \
+    ${par_sort_bam_by_coordinate:+--sort-bam-by-coordinate} \
     ${par_fai:+--fai "$par_fai"} \
     ${par_seed:+--seed "$par_seed"} \
     ${par_seed_length:+--seed-length "$par_seed_length"} \
@@ -101,3 +89,10 @@ rsem-calculate-expression \
     $INDEX \
     $par_id
 
+[[ -f "${par_id}.genes.results" ]] && mv "${par_id}.genes.results" $par_counts_gene
+[[ -f "${par_id}.isoforms.results" ]] && mv "${par_id}.isoforms.results" $par_counts_transcripts
+[[ -d "${par_id}.stat" ]] && mv "${par_id}.stat" $par_stat
+[[ -f "${par_id}.log" ]] && mv "${par_id}.log" $par_logs
+[[ -f "${par_id}.STAR.genome.bam" ]] && mv "${par_id}.STAR.genome.bam" $par_bam_star
+[[ -f "${par_id}.genome.bam" ]] && mv "${par_id}.genome.bam" $par_bam_genome
+[[ -f "${par_id}.transcript.bam" ]] && mv "${par_id}.transcript.bam" $par_bam_transcript
diff --git a/src/sortmerna/config.vsh.yaml b/src/sortmerna/config.vsh.yaml
@@ -42,15 +42,17 @@ argument_groups:
     description: Sortmerna log file.
   - name: "--output"
     alternatives: ["--aligned"]
-    type: string
+    type: file
     description: |
       Directory and file prefix for aligned output. The appropriate extension: 
       (fasta|fastq|blast|sam|etc) is automatically added.
       If 'dir' is not specified, the output is created in the WORKDIR/out/.
       If 'pfx' is not specified, the prefix 'aligned' is used.
+    direction: output
   - name: "--other"
-    type: string
-    description: Create Non-aligned reads output file with this path/prefix. Must be used with fastx. 
+    type: file
+    description: Create Non-aligned reads output file with this path/prefix. Must be used with fastx.
+    direction: output
 
 - name: "Options"
   arguments:
@@ -91,7 +93,7 @@ argument_groups:
     type: integer
     description: |
       search all alignments having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is
-      computed using seeds’ positions to expand hits into longer matches prior to Smith-Waterman alignment. Default: '2'.
+      computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. Default: '2'.
     example: 2
   - name: "--print_all_reads"
     type: boolean_true
@@ -152,7 +154,7 @@ argument_groups:
   - name: "--N"
     type: integer
     description: |
-      Smith-Waterman penalty for ambiguous letters (N’s) scored as --mismatch. Default: '-1'.\
+      Smith-Waterman penalty for ambiguous letters (N's) scored as --mismatch. Default: '-1'.
     example: -1
   - name: "--a"
     type: integer
@@ -207,7 +209,7 @@ argument_groups:
     - name: "--otu_map"
       type: boolean_true
       description: |
-        Output OTU map (input to QIIME’s make_otu_table.py).
+        Output OTU map (input to QIIME's make_otu_table.py).
 
 - name: "Advanced options"
   arguments:
@@ -226,7 +228,7 @@ argument_groups:
     description: |
       The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the
       reference sequence before performing Smith-Waterman alignment. Default: '4'.
-    example: 4
+    example: "4"
   - name: "--full_search"
     type: boolean_true
     description: |
@@ -263,8 +265,6 @@ argument_groups:
       Maximum number of positions to store for each unique L-mer. Set to 0 to store all positions. Default: '1000'
     example: 1000
 
-
-
 resources:
   - type: bash_script
     path: script.sh
@@ -276,15 +276,12 @@ test_resources:
 
 engines:
 - type: docker
-  image: ubuntu:22.04
+  image: quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0
   setup: 
     - type: docker
       run: |
-        apt-get update && \
-        apt-get install -y --no-install-recommends gzip cmake g++ wget && \
-        apt-get clean && \
-        wget --no-check-certificate https://github.com/sortmerna/sortmerna/releases/download/v4.3.6/sortmerna-4.3.6-Linux.sh && \
-        bash sortmerna-4.3.6-Linux.sh --skip-license
+        echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`
+
 runners: 
 - type: executable
-- type: nextflow 
+- type: nextflow
diff --git a/src/sortmerna/script.sh b/src/sortmerna/script.sh
@@ -37,16 +37,11 @@ if [[ ! -z "$par_ribo_database_manifest" ]]; then
 
 elif [[ ! -z "$par_ref" ]]; then
     IFS=";" read -ra ref <<< "$par_ref"
-    # check if length is 2 and par_paired is set to true
-    if [[ "${#ref[@]}" -eq 2 && "$par_paired" == "true" ]]; then
-        refs="--ref ${ref[0]} --ref ${ref[1]}"
-    # check if length is 1 and par_paired is set to false
-    elif [[ "${#ref[@]}" -eq 1 && "$par_paired" == "false" ]]; then
-            refs="--ref $par_ref"      
-    else # if one reference provided but paired is set to true:
-        echo "Two reference fasta files are required for paired-end reads"
-            exit 1
-    fi
+    for i in "${ref[@]}"
+    do
+        refs+="-ref $i "
+    done
+
 else 
     echo "No reference fasta file(s) provided"
     exit 1

diff --git a/src/sortmerna/test.sh b/src/sortmerna/test.sh
@@ -31,7 +31,7 @@ rm -f rRNA_reads_fwd.fq.gz rRNA_reads_rev.fq.gz non_rRNA_reads_fwd.fq.gz non_rRN
 rm -rf kvdb/
 
 ################################################################################
-echo ">>> Testing for paired-end reads and --ref and --paired_out argumens"
+echo ">>> Testing for paired-end reads and --ref and --paired_out arguments"
 "$meta_executable" \
     --output "rRNA_reads" \
     --other "non_rRNA_reads" \

diff --git a/src/umi_tools/umi_tools_extract/config.vsh.yaml b/src/umi_tools/umi_tools_extract/config.vsh.yaml
@@ -128,12 +128,6 @@ argument_groups:
         Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying
         the reads with the same mapping position, but treat similar yet nonidentical UMIs differently. Default: `directional`
       example: "directional"
-    - name: --umi_discard_read
-      type: integer
-      choices: [0, 1, 2]
-      description: |
-        After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively. Default: `0`
-      example: 0
 
   - name: Common Options
     arguments:
@@ -144,7 +138,6 @@ argument_groups:
     - name: --log2stderr
       type: boolean_true
       description: Send logging information to stderr.
-      direction: output
     - name: --verbose
       type: integer
       description: Log level. The higher, the more output.

diff --git a/src/umi_tools/umi_tools_extract/script.sh b/src/umi_tools/umi_tools_extract/script.sh
@@ -82,12 +82,3 @@ umi_tools extract \
     ${par_log2stderr:+--log2stderr} \
     ${par_verbose:+--verbose "$par_verbose"} \
     ${par_error:+--error "$par_error"}
-
-
-if [ "$par_umi_discard_read" == 1 ]; then
-    # discard read 1
-    rm "$par_read1_out"
-elif [ "$par_umi_discard_read" == 2 ]; then
-    # discard read 2 (-f to bypass file existence check)
-    rm -f "$par_read2_out"
-fi