added

wtsi-hgi · Apr 12, 2024 · 877e103 · 877e103
1 parent 258a127
commit 877e103
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 8 deletions.
diff --git a/assets/deploy_scripts/sanger_module_files/1.6 b/assets/deploy_scripts/sanger_module_files/1.6
@@ -0,0 +1,41 @@
+#%Module1.0
+set version 1.6
+set program Yascp
+
+set version [file tail [module-info version [module-info name]]]
+
+proc ModulesHelp { } {
+    global version
+    puts stderr "YASCP (Yet Another Single Cell (scRNA) Pieline: https://github.com/wtsi-hgi/yascp) is a nextflow pipeline that QCs the scRNA Cellranger data by removing ambient RNA, deconvoluting donors, assigning celltypes, analysing concordances vs expected genotypes. IMPROVEMENTS: 1) Added support for running cellbender with different parameters. 2) Citeseq, VDJ data integrations using Surat integration, 3) You can now only run only the cellbender step 4) Have added additional 5 doublet detection methods 5) Can provide your own Azimuth and Celltypist references 6) You can now only run the doublet detection step"
+    puts stderr ""
+    puts stderr "Yascp module has been set to run in multiple modes:"
+    puts stderr "	*yascp -v or yascp -h :will describe the checkout tag used."
+    puts stderr "	*yascp celltypes -c inputs.nf - run just a celltype assignment based on the input h5ad files."
+    puts stderr "	*yascp doublets -c input.nf : Run just doublet assesments on your h5ad/matrix files."
+    puts stderr "	*yascp cluster -c inputs.nf - run just a celltype assignment based on the input h5ad files."
+    puts stderr "	*yascp cellbender -c inputs.nf - run just a celltype assignment based on the input h5ad files."
+    puts stderr " 	*yascp test :will run a OneK1k test dataset."
+    puts stderr " 	*yascp sample_input :will create a directory sample_input in your current working directory where you will sample input files."
+    puts stderr " 	*yascp fetch :(you need an input.tsv file (ap per this: https://github.com/wtsi-hgi/yascp/blob/main/sample_input/input.tsv) in project directory where you would like to fetch the cellranged data. This file should contain sanger_sample_id as a header and all the uniqie sanger sample ids in the folowin rows) will fetch the cellranger data and prepeare the basic yascp inputs."
+    puts stderr "  	*yascp clean -c inputs.nf: will rsync results and extract symlinks from the folder defined as an 'outdir' and then remove work directory to avoid running out of quota (this will remove nextflow catche and pipeline will need to be rerun from beggining if needed)"
+    puts stderr "	*yascp -c input.nf :will run your samples as defined in input file. If you are unsure how to fill in the info file please refear to this documentation: https://github.com/wtsi-hgi/yascp/blob/main/docs/usage.md"
+    puts stderr ""
+    puts stderr "For the data interpretation please folow this documentation: https://github.com/wtsi-hgi/yascp/tree/main/docs"
+    puts stderr ""
+}
+
+module-whatis   "Yascp Version: $version" 
+module-whatis   "Yascp version $version is a single cell (scRNA) processing pipeline that takes care of donor deconvolution, ambient rna removal, celltype assignment, integration, clustering and cluster assesments and data qc: yascp (https://github.com/wtsi-hgi/yascp)"
+
+
+set install /software/hgi/pipelines/yascp_versions/yascp_v1.6
+module load ISG/singularity/3.11.4
+module load HGI/common/nextflow/22.04.4
+module load HGI/common/baton
+module load ISG/experimental/irods/4.2.7
+
+prepend-path PATH "/software/hgi/envs/conda/team151/mo11/mo11/bin"
+prepend-path PATH "/software/hgi/containers/yascp/modules/full_yascp"
+prepend-path PATH "/software/hgi/pipelines/yascp_versions/yascp_v1.6/bin"
+
+prepend-path PATH "/software/hgi/pipelines/yascp_versions/yascp_v1.6/assets/deploy_scripts/module_exacutables"
diff --git a/conf/base.conf b/conf/base.conf
@@ -26,7 +26,7 @@ params{
     hard_filters_file = "no_file__file_sample_qc" //# This may point to the sample_qc.yml input which will apply hard filters to the merged cells.
     hard_filters_drop = false //#This indicates whether we want to drop the cells that fail hard filters of just flag them
     add_snps_to_pile_up_based_on_genotypes_provided = false // #whether we want to add informative snp to pile up in bam to enhance deconvolutions.
-
+    bcf_viewfilters = ""
     encrypt = false
     write_h5 = true
     remove_work_dir = false

diff --git a/modules/nf-core/modules/cellsnp/main.nf b/modules/nf-core/modules/cellsnp/main.nf
@@ -39,11 +39,15 @@ process DYNAMIC_DONOR_EXCLUSIVE_SNP_SELECTION{
       tuple val(samplename), path("cellsnp_panel_${samplename}.vcf.gz"),emit:cellsnp_pool_panel
       tuple val(samplename), path("set2_informative_sites_${samplename}.tsv"), path("set1_uninformative_sites_${samplename}.tsv"),path("variants_description.tsv"),emit:informative_uninformative_sites 
     script:       
-      if (params.add_dynamic_sites_or_not_to_panel){
-        cmd1="ln -s ${vcf_file} dynamic_snps.vcf.gz"
-      }else{
-        cmd1="bcftools view -R ${cellsnp_primary_file} ${vcf_file} -Oz -o  dynamic_snps.vcf.gz"
+      if (add_dynamic_sites_or_not_to_panel){
+        cmd2 = "cat cellsnp_variants.tsv >> cellsnp_panel_${samplename}.vcf"
+      }{
+        cmd2 = ''
       }
+        cmd1="ln -s ${vcf_file} dynamic_snps.vcf.gz"
+      // }else{
+      //   cmd1="bcftools view -R ${cellsnp_primary_file} ${vcf_file} -Oz -o  dynamic_snps.vcf.gz"
+      // }
 
       """
         echo ${samplename}
@@ -54,7 +58,7 @@ process DYNAMIC_DONOR_EXCLUSIVE_SNP_SELECTION{
         dynamic_donor_exclusive_snp_selection.py -cpus ${task.cpus} -vcf dynamic_snps.vcf.gz -cellsnp ${cellsnp_primary_file}
         echo test > output.csv
         bcftools view -h ${cellsnp_primary_file} > cellsnp_panel_${samplename}.vcf
-        cat cellsnp_variants.tsv >> cellsnp_panel_${samplename}.vcf
+        ${cmd2}
         ln -s set1_uninformative_sites.tsv set1_uninformative_sites_${samplename}.tsv
         ln -s set2_informative_sites.tsv set2_informative_sites_${samplename}.tsv
         bgzip cellsnp_panel_${samplename}.vcf

diff --git a/modules/nf-core/modules/subset_genotype/main.nf b/modules/nf-core/modules/subset_genotype/main.nf
@@ -230,6 +230,7 @@ process JOIN_CHROMOSOMES{
     script:
       s1 = samplename.split('___')[0]
       s2 = samplename.split('___')[1]
+
       """
         vcf_name=\$(python ${projectDir}/bin/random_id.py)
         fofn_input_subset.sh "${study_vcf_files}"
@@ -238,9 +239,10 @@ process JOIN_CHROMOSOMES{
         bcftools view pre_\${vcf_name}.bcf.gz | awk '{gsub(/^chr/,""); print}' | awk '{gsub(/ID=chr/,"ID="); print}' > no_prefix_pre_\${vcf_name}.vcf
         bgzip no_prefix_pre_\${vcf_name}.vcf
         bcftools index no_prefix_pre_\${vcf_name}.vcf.gz
-
+        bcftools view ${params.bcf_viewfilters} no_prefix_pre_\${vcf_name}.vcf.gz -Oz -o no_prefix2_pre_\${vcf_name}.vcf.gz
+        bcftools index no_prefix2_pre_\${vcf_name}.vcf.gz
         #bcftools index pre_\${vcf_name}.bcf.gz
-        bcftools +fixref no_prefix_pre_\${vcf_name}.vcf.gz -Ob -o fix_ref_\${vcf_name}_out.bcf.gz -- -d -f ${genome}/genome.fa -m flip-all
+        bcftools +fixref no_prefix2_pre_\${vcf_name}.vcf.gz -Ob -o fix_ref_\${vcf_name}_out.bcf.gz -- -d -f ${genome}/genome.fa -m flip-all
         #ln -s pre_\${vcf_name}.bcf.gz \${vcf_name}_out.bcf.gz
         #bcftools +fill-tags fix_ref_\${vcf_name}_out.bcf.gz -Ob -o \${vcf_name}_out.bcf.gz
         bcftools annotate -x INFO fix_ref_\${vcf_name}_out.bcf.gz -Ob -o \${vcf_name}_out.bcf.gz