Skip to content

Commit

Permalink
added
Browse files Browse the repository at this point in the history
  • Loading branch information
Matiss Ozols committed Apr 12, 2024
1 parent 258a127 commit 877e103
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 8 deletions.
41 changes: 41 additions & 0 deletions assets/deploy_scripts/sanger_module_files/1.6
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#%Module1.0
set version 1.6
set program Yascp

set version [file tail [module-info version [module-info name]]]

proc ModulesHelp { } {
global version
puts stderr "YASCP (Yet Another Single Cell (scRNA) Pieline: https://github.com/wtsi-hgi/yascp) is a nextflow pipeline that QCs the scRNA Cellranger data by removing ambient RNA, deconvoluting donors, assigning celltypes, analysing concordances vs expected genotypes. IMPROVEMENTS: 1) Added support for running cellbender with different parameters. 2) Citeseq, VDJ data integrations using Surat integration, 3) You can now only run only the cellbender step 4) Have added additional 5 doublet detection methods 5) Can provide your own Azimuth and Celltypist references 6) You can now only run the doublet detection step"
puts stderr ""
puts stderr "Yascp module has been set to run in multiple modes:"
puts stderr " *yascp -v or yascp -h :will describe the checkout tag used."
puts stderr " *yascp celltypes -c inputs.nf - run just a celltype assignment based on the input h5ad files."
puts stderr " *yascp doublets -c input.nf : Run just doublet assesments on your h5ad/matrix files."
puts stderr " *yascp cluster -c inputs.nf - run just a celltype assignment based on the input h5ad files."
puts stderr " *yascp cellbender -c inputs.nf - run just a celltype assignment based on the input h5ad files."
puts stderr " *yascp test :will run a OneK1k test dataset."
puts stderr " *yascp sample_input :will create a directory sample_input in your current working directory where you will sample input files."
puts stderr " *yascp fetch :(you need an input.tsv file (ap per this: https://github.com/wtsi-hgi/yascp/blob/main/sample_input/input.tsv) in project directory where you would like to fetch the cellranged data. This file should contain sanger_sample_id as a header and all the uniqie sanger sample ids in the folowin rows) will fetch the cellranger data and prepeare the basic yascp inputs."
puts stderr " *yascp clean -c inputs.nf: will rsync results and extract symlinks from the folder defined as an 'outdir' and then remove work directory to avoid running out of quota (this will remove nextflow catche and pipeline will need to be rerun from beggining if needed)"
puts stderr " *yascp -c input.nf :will run your samples as defined in input file. If you are unsure how to fill in the info file please refear to this documentation: https://github.com/wtsi-hgi/yascp/blob/main/docs/usage.md"
puts stderr ""
puts stderr "For the data interpretation please folow this documentation: https://github.com/wtsi-hgi/yascp/tree/main/docs"
puts stderr ""
}

module-whatis "Yascp Version: $version"
module-whatis "Yascp version $version is a single cell (scRNA) processing pipeline that takes care of donor deconvolution, ambient rna removal, celltype assignment, integration, clustering and cluster assesments and data qc: yascp (https://github.com/wtsi-hgi/yascp)"


set install /software/hgi/pipelines/yascp_versions/yascp_v1.6
module load ISG/singularity/3.11.4
module load HGI/common/nextflow/22.04.4
module load HGI/common/baton
module load ISG/experimental/irods/4.2.7

prepend-path PATH "/software/hgi/envs/conda/team151/mo11/mo11/bin"
prepend-path PATH "/software/hgi/containers/yascp/modules/full_yascp"
prepend-path PATH "/software/hgi/pipelines/yascp_versions/yascp_v1.6/bin"

prepend-path PATH "/software/hgi/pipelines/yascp_versions/yascp_v1.6/assets/deploy_scripts/module_exacutables"
2 changes: 1 addition & 1 deletion conf/base.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ params{
hard_filters_file = "no_file__file_sample_qc" //# This may point to the sample_qc.yml input which will apply hard filters to the merged cells.
hard_filters_drop = false //#This indicates whether we want to drop the cells that fail hard filters of just flag them
add_snps_to_pile_up_based_on_genotypes_provided = false // #whether we want to add informative snp to pile up in bam to enhance deconvolutions.

bcf_viewfilters = ""
encrypt = false
write_h5 = true
remove_work_dir = false
Expand Down
14 changes: 9 additions & 5 deletions modules/nf-core/modules/cellsnp/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ process DYNAMIC_DONOR_EXCLUSIVE_SNP_SELECTION{
tuple val(samplename), path("cellsnp_panel_${samplename}.vcf.gz"),emit:cellsnp_pool_panel
tuple val(samplename), path("set2_informative_sites_${samplename}.tsv"), path("set1_uninformative_sites_${samplename}.tsv"),path("variants_description.tsv"),emit:informative_uninformative_sites
script:
if (params.add_dynamic_sites_or_not_to_panel){
cmd1="ln -s ${vcf_file} dynamic_snps.vcf.gz"
}else{
cmd1="bcftools view -R ${cellsnp_primary_file} ${vcf_file} -Oz -o dynamic_snps.vcf.gz"
if (add_dynamic_sites_or_not_to_panel){
cmd2 = "cat cellsnp_variants.tsv >> cellsnp_panel_${samplename}.vcf"
}{
cmd2 = ''
}
cmd1="ln -s ${vcf_file} dynamic_snps.vcf.gz"
// }else{
// cmd1="bcftools view -R ${cellsnp_primary_file} ${vcf_file} -Oz -o dynamic_snps.vcf.gz"
// }

"""
echo ${samplename}
Expand All @@ -54,7 +58,7 @@ process DYNAMIC_DONOR_EXCLUSIVE_SNP_SELECTION{
dynamic_donor_exclusive_snp_selection.py -cpus ${task.cpus} -vcf dynamic_snps.vcf.gz -cellsnp ${cellsnp_primary_file}
echo test > output.csv
bcftools view -h ${cellsnp_primary_file} > cellsnp_panel_${samplename}.vcf
cat cellsnp_variants.tsv >> cellsnp_panel_${samplename}.vcf
${cmd2}
ln -s set1_uninformative_sites.tsv set1_uninformative_sites_${samplename}.tsv
ln -s set2_informative_sites.tsv set2_informative_sites_${samplename}.tsv
bgzip cellsnp_panel_${samplename}.vcf
Expand Down
6 changes: 4 additions & 2 deletions modules/nf-core/modules/subset_genotype/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ process JOIN_CHROMOSOMES{
script:
s1 = samplename.split('___')[0]
s2 = samplename.split('___')[1]

"""
vcf_name=\$(python ${projectDir}/bin/random_id.py)
fofn_input_subset.sh "${study_vcf_files}"
Expand All @@ -238,9 +239,10 @@ process JOIN_CHROMOSOMES{
bcftools view pre_\${vcf_name}.bcf.gz | awk '{gsub(/^chr/,""); print}' | awk '{gsub(/ID=chr/,"ID="); print}' > no_prefix_pre_\${vcf_name}.vcf
bgzip no_prefix_pre_\${vcf_name}.vcf
bcftools index no_prefix_pre_\${vcf_name}.vcf.gz
bcftools view ${params.bcf_viewfilters} no_prefix_pre_\${vcf_name}.vcf.gz -Oz -o no_prefix2_pre_\${vcf_name}.vcf.gz
bcftools index no_prefix2_pre_\${vcf_name}.vcf.gz
#bcftools index pre_\${vcf_name}.bcf.gz
bcftools +fixref no_prefix_pre_\${vcf_name}.vcf.gz -Ob -o fix_ref_\${vcf_name}_out.bcf.gz -- -d -f ${genome}/genome.fa -m flip-all
bcftools +fixref no_prefix2_pre_\${vcf_name}.vcf.gz -Ob -o fix_ref_\${vcf_name}_out.bcf.gz -- -d -f ${genome}/genome.fa -m flip-all
#ln -s pre_\${vcf_name}.bcf.gz \${vcf_name}_out.bcf.gz
#bcftools +fill-tags fix_ref_\${vcf_name}_out.bcf.gz -Ob -o \${vcf_name}_out.bcf.gz
bcftools annotate -x INFO fix_ref_\${vcf_name}_out.bcf.gz -Ob -o \${vcf_name}_out.bcf.gz
Expand Down

0 comments on commit 877e103

Please sign in to comment.