diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh index 6b50f60d4..b2e408301 100755 --- a/.ci_stuff/test_dag.sh +++ b/.ci_stuff/test_dag.sh @@ -184,47 +184,50 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 333 ]; then exit 1 ; fi # mRNA-seq WC=`mRNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 882 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 879 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 892 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 889 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 908 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 905 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 602 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 599 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 948 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 945 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment-free,deepTools_qc" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1005 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1006 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --bcExtract --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 916 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 913 ]; then exit 1 ; fi WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --bcExtract --UMIDedup --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 966 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 963 ]; then exit 1 ; fi WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 807 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 804 ]; then exit 1 ; fi WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 526 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 523 ]; then exit 1 ; fi WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 863 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 860 ]; then exit 1 ; fi WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment-free,deepTools_qc" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 920 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 921 ]; then exit 1 ; fi WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --fastqc .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 975 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 972 ]; then exit 1 ; fi WC=`mRNA-seq -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 659 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 657 ]; then exit 1 ; fi #multiple comparison groups WC=`mRNA-seq --mode alignment,alignment-free -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 867 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 869 ]; then exit 1 ; fi #allelic WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1360 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1357 ]; then exit 1 ; fi +WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1118 ]; then exit 1 ; fi WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1367 ]; then exit 1 ; fi WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1353 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1350 ]; then exit 1 ; fi WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1367 ]; then exit 1 ; fi +WC=`mRNA-seq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1803 ]; then exit 1 ; fi -# noncoding-RNA-seq WC=`noncoding-RNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 717 ]; then exit 1 ; fi WC=`noncoding-RNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l` diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index d0200ad21..b6d93a2ce 100755 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -1,6 +1,6 @@ package: name: snakepipes - version: 2.6.1 + version: 2.7.0 source: path: ../ diff --git a/docs/content/News.rst b/docs/content/News.rst old mode 100755 new mode 100644 index c27fbe4b1..2fbfa886c --- a/docs/content/News.rst +++ b/docs/content/News.rst @@ -1,11 +1,19 @@ snakePipes News =============== + +snakePipes 2.7.0 +---------------- + +* Added the allelic version of Salmon-based transcript quantitation to mRNA-seq workflow. Will be run if *both* 'allelic-mapping' and 'alignment-free' modes are specified. An allelic version of sleuth will be run, if sample sheet is provided, as well as DESeq2 on allelic Salmon counts. + + snakePipes 2.6.1 ---------------- * Capped tabulate version as 0.9.0 breaks snakemake + snakePipes 2.6.0 ---------------- @@ -19,6 +27,7 @@ snakePipes 2.6.0 * Fixed a couple of issues in the ATAC-seq workflow after sofware versions update. * Fixed genome size conversion to string. + snakePipes 2.5.4 ---------------- diff --git a/snakePipes/__init__.py b/snakePipes/__init__.py index 574f4077e..766ce2d06 100755 --- a/snakePipes/__init__.py +++ b/snakePipes/__init__.py @@ -1 +1 @@ -__version__ = '2.6.1' +__version__ = '2.7.0' diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index bcc3f8518..27ee0cbd3 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -22,6 +22,7 @@ def set_env_yamls(): return {'CONDA_SHARED_ENV': 'envs/shared.yaml', 'CONDA_CREATE_INDEX_ENV': 'envs/createIndices.yaml', 'CONDA_RNASEQ_ENV': 'envs/rna_seq.yaml', + 'CONDA_SLEUTH_ENV': 'envs/sleuth.yaml', 'CONDA_RMATS_ENV': 'envs/rMats.yaml', 'CONDA_scRNASEQ_ENV': 'envs/sc_rna_seq.yaml', 'CONDA_seurat_ENV': 'envs/sc_rna_seq_seurat.yaml', diff --git a/snakePipes/shared/rscripts/DESeq2.R b/snakePipes/shared/rscripts/DESeq2.R index e905633a5..4f76297b1 100755 --- a/snakePipes/shared/rscripts/DESeq2.R +++ b/snakePipes/shared/rscripts/DESeq2.R @@ -79,7 +79,7 @@ if(isTRUE(tximport)) { tx2gene <- read.delim(tx2gene_file, header = FALSE) tx2gene <- tx2gene[c(1,2)] # check setup table and import - countdata <- checktable(sampleSheet = sampleInfo, salmon_dir = dirname(countFilePath), tx2gene_annot = tx2gene) + countdata <- checktable(sampleSheet = sampleInfo, salmon_dir = dirname(countFilePath), tx2gene_annot = tx2gene, alleleSpecific = allelic_info) } else { sampleInfo$name <- make.names(sampleInfo$name) rownames(sampleInfo)<-sampleInfo$name @@ -93,11 +93,15 @@ if(isTRUE(tximport)) { ## ~~~~~~~ 3. run DESeq wrapper ~~~~~~~~ #in case of the allelic-specific workflow, allow for 1 condition and skip deseq2 basic in this case if(length(unique(sampleInfo$condition))>1){ - seqout <- DESeq_basic(countdata, coldata = sampleInfo, fdr = fdr, alleleSpecific = allelic_info, from_salmon = tximport) + if(tximport & allelic_info){ + message("Detected allelic Salmon counts. Skipping DESeq_basic.") + }else{ + seqout <- DESeq_basic(countdata, coldata = sampleInfo, fdr = fdr, alleleSpecific = allelic_info, from_salmon = tximport) - DESeq_writeOutput(DEseqout = seqout, + DESeq_writeOutput(DEseqout = seqout, fdr = fdr, outprefix = "DEseq_basic", geneNamesFile = geneNamesFilePath) + } } #DESeq_downstream(DEseqout = seqout, countdata, sampleInfo, # fdr = fdr, outprefix = "DEseq_basic", heatmap_topN = topN, @@ -105,7 +109,7 @@ if(length(unique(sampleInfo$condition))>1){ ## Run allele-sepecific DESeq wrapper (if asked for) if (isTRUE(allelic_info)) { - seqout_allelic <- DESeq_allelic(countdata, coldata = sampleInfo, fdr = fdr) + seqout_allelic <- DESeq_allelic(countdata, coldata = sampleInfo, fdr = fdr, from_salmon=tximport) DESeq_writeOutput(DEseqout = seqout_allelic, fdr = fdr, outprefix = "DEseq_allelic", @@ -132,9 +136,10 @@ write.bibtex(bib, file = 'citations.bib') file.copy(rmdTemplate, to = 'DESeq2_report_basic.Rmd') if(length(unique(sampleInfo$condition))>1){ - outprefix = "DEseq_basic" - cite_options(citation_format = "text",style = "html",cite.style = "numeric",hyperlink = TRUE) - render('DESeq2_report_basic.Rmd', + if(!tximport & !allelic_info){ + outprefix = "DEseq_basic" + cite_options(citation_format = "text",style = "html",cite.style = "numeric",hyperlink = TRUE) + render('DESeq2_report_basic.Rmd', output_format = "html_document", clean = TRUE, params = list( @@ -142,9 +147,10 @@ if(length(unique(sampleInfo$condition))>1){ ddr.df = paste0(outprefix, "_DEresults.tsv"), countdata = countFilePath, coldata = sampleInfo, - fdr = 0.05, + fdr = fdr, heatmap_topN = 20, geneNamesFile = geneNamesFilePath)) + } } if (isTRUE(allelic_info)) { @@ -158,7 +164,7 @@ if (isTRUE(allelic_info)) { ddr.df = paste0(outprefix, "_DEresults.tsv"), countdata = countFilePath, coldata = sampleInfo, - fdr = 0.05, + fdr = fdr, heatmap_topN = 20, geneNamesFile = geneNamesFilePath)) } diff --git a/snakePipes/shared/rscripts/DESeq2Report.Rmd b/snakePipes/shared/rscripts/DESeq2Report.Rmd index f54505281..ab647d8b4 100644 --- a/snakePipes/shared/rscripts/DESeq2Report.Rmd +++ b/snakePipes/shared/rscripts/DESeq2Report.Rmd @@ -81,7 +81,7 @@ load(params$DEseqoutRdata) ## dds and ddr ddr.df <- params$ddr.df #_DEresults.tsv file countdata <- params$countdata coldata <- params$coldata -fdr <- params$fdr +fdr <- as.numeric(params$fdr) heatmap_topN <- params$heatmap_topN geneNamesFile <- params$geneNamesFile diff --git a/snakePipes/shared/rscripts/DE_functions.R b/snakePipes/shared/rscripts/DE_functions.R index 4ca2c5446..2c5320f9a 100644 --- a/snakePipes/shared/rscripts/DE_functions.R +++ b/snakePipes/shared/rscripts/DE_functions.R @@ -31,8 +31,13 @@ checktable <- function(countdata = NA, sampleSheet = NA, alleleSpecific = FALSE, ## check files if(!is.na(salmon_dir)) { - # mode = Salmon : check whether salmon output files exist in Salmon dir - files <- paste0(salmon_dir,"/",sampleSheet$name,".quant.sf") + #mode = Salmon : check whether salmon output files exist in Salmon dir + if(alleleSpecific){ + files <- c(paste0(salmon_dir,"/",sampleSheet$name,".genome1.quant.sf"),paste0(salmon_dir,"/",sampleSheet$name,".genome2.quant.sf")) + }else{ + files <- paste0(salmon_dir,"/",sampleSheet$name,".quant.sf") + } + #files<-dir(salmon_dir,pattern=".quant.sf",full.names=TRUE) filecheck <- file.exists(files) if(!(all(filecheck == TRUE))) { cat("Error! The following File(s) don't exist : ") @@ -63,7 +68,7 @@ checktable <- function(countdata = NA, sampleSheet = NA, alleleSpecific = FALSE, } } } - if(all(is.integer(countdata))){ + if(all(is.integer(countdata)) | !is.na(salmon_dir)){ print("All countdata is integer.") }else{ print("Non-integer counts detected. The data will be rounded, as this is well within the expected sampling variation of a technical replicate.") @@ -88,6 +93,7 @@ DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_sa cnames.sub<-unique(colnames(coldata)[2:which(colnames(coldata) %in% "condition")]) d<-as.formula(noquote(paste0("~",paste(cnames.sub,collapse="+")))) + # Normal DESeq print("Performing basic DESeq: test vs control") if(isTRUE(from_salmon)) { @@ -95,18 +101,18 @@ DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_sa dds <- DESeq2::DESeqDataSetFromTximport(countdata, colData = coldata, design =d) - } else { - print("Using input from count table") - if(isTRUE(alleleSpecific)) { - rnasamp <- dplyr::select(countdata, dplyr::ends_with("_all")) - rownames(coldata)<-colnames(rnasamp) - dds <- DESeq2::DESeqDataSetFromMatrix(countData = rnasamp, - colData = coldata, design =d) } else { - dds <- DESeq2::DESeqDataSetFromMatrix(countData = countdata, + if(isTRUE(alleleSpecific)) { + rnasamp <- dplyr::select(countdata, dplyr::ends_with("_all")) + rownames(coldata)<-colnames(rnasamp) + countdata<-rnasamp + } + + dds <- DESeq2::DESeqDataSetFromMatrix(countData = countdata, colData = coldata, design =d) + } - } + if(length(size_factors) > 1) { print("applying size factors") print(size_factors) @@ -135,33 +141,50 @@ DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_sa #' #' -DESeq_allelic <- function(countdata, coldata, fdr) { +DESeq_allelic <- function(countdata, coldata, fdr, from_salmon=FALSE) { # AlleleSpecific DEseq print("Performing Allele-specific DESeq using Interaction design : Genome2 vs Genome1") + if(isTRUE(from_salmon)) { + # create alleleSpecific design matrix + coldata_allelic <- data.frame(name = colnames(as.data.frame(countdata$counts)), + allele = rep(c("genome1", "genome2"), nrow(coldata)), + condition = rep(coldata$condition, each = 2) ) + rownames(coldata_allelic)<-colnames(as.data.frame(countdata$counts)) + coldata_allelic$allele<-factor(coldata_allelic$allele,levels=c("genome1","genome2")) + coldata_allelic$condition<-factor(coldata_allelic$condition,levels=unique(coldata_allelic$condition)) + print("Using input from tximport") + dds <- DESeq2::DESeqDataSetFromTximport(countdata, + colData = coldata_allelic, design =~1) + + } else { + print("Using input from count table") rnasamp <- dplyr::select(countdata, -dplyr::ends_with("_all")) # create alleleSpecific design matrix - design <- data.frame(name = colnames(rnasamp), + coldata_allelic <- data.frame(name = colnames(rnasamp), allele = rep(c("genome1", "genome2"), nrow(coldata)), condition = rep(coldata$condition, each = 2) ) - rownames(design)<-colnames(rnasamp) + rownames(coldata_allelic)<-colnames(rnasamp) + coldata_allelic$allele<-factor(coldata_allelic$allele,levels=c("genome1","genome2")) + coldata_allelic$condition<-factor(coldata_allelic$condition,levels=unique(coldata_allelic$condition)) + dds <- DESeq2::DESeqDataSetFromMatrix(rnasamp, colData = coldata_allelic, + design = ~1) + rownames(dds) <- rownames(rnasamp) + } + # Run DESeq - if(length(unique(design$condition))>1){ - dds <- DESeq2::DESeqDataSetFromMatrix(rnasamp, colData = design, - design = ~allele + condition + allele:condition) - rownames(dds) <- rownames(rnasamp) + if(length(unique(coldata_allelic$condition))>1){ + DESeq2::design(dds) <- formula(~allele + condition + allele:condition) dds <- DESeq2::DESeq(dds,betaPrior = FALSE) ddr <- DESeq2::results(dds, name=paste0("allelegenome2.condition",unique(coldata$condition)[2])) ddr_shrunk <- DESeq2::lfcShrink(dds,coef=paste0("allelegenome2.condition",unique(coldata$condition)[2]),type="apeglm",res=ddr) } else { - dds <- DESeq2::DESeqDataSetFromMatrix(rnasamp, colData = design, - design = ~allele) - rownames(dds) <- rownames(rnasamp) - dds <- DESeq2::DESeq(dds,betaPrior = FALSE) - ddr <- DESeq2::results(dds, name="allele_genome2_vs_genome1") - ddr_shrunk <- DESeq2::lfcShrink(dds,coef="allele_genome2_vs_genome1",type="apeglm",res=ddr) + DESeq2::design(dds) <- formula(~allele) + dds <- DESeq2::DESeq(dds,betaPrior = FALSE) + ddr <- DESeq2::results(dds, name="allele_genome2_vs_genome1") + ddr_shrunk <- DESeq2::lfcShrink(dds,coef="allele_genome2_vs_genome1",type="apeglm",res=ddr) } output <- list(dds = dds, ddr = ddr, ddr_shrunk=ddr_shrunk) return(output) diff --git a/snakePipes/shared/rscripts/sleuth.R b/snakePipes/shared/rscripts/sleuth.R index 3c26b6ad4..23f4c3bfa 100644 --- a/snakePipes/shared/rscripts/sleuth.R +++ b/snakePipes/shared/rscripts/sleuth.R @@ -38,9 +38,7 @@ print(salmon_dirs) s2c = mutate(sample_info, path=salmon_dirs) ## reorder conditions (for Wald test later on: order of comparison important for fold change) -if ( s2c$condition[[1]] != levels(s2c$condition)[[1]] ) { - s2c$condition = relevel(s2c$condition, as.character(s2c$condition[[1]]) ) -} +s2c$condition<-factor(s2c$condition,levels=unique(s2c$condition)) print(s2c) ## get gene names / symbol names @@ -50,10 +48,12 @@ if (exists('t2g')) { colnames(t2g) <- c("target_id","ens_gene","ext_gene") ## add gene names - so <- sleuth_prep(s2c, full_model=d, target_mapping = t2g) + so <- sleuth_prep(s2c, full_model=d, target_mapping = t2g, num_cores=6, + transformation_function=function(x) log2(x + 0.5)) } else { ## construct sleuth object - so = sleuth_prep(s2c, full_model=d) + so = sleuth_prep(s2c, full_model=d, num_cores=6, + transformation_function=function(x) log2(x + 0.5)) } ## model expression responding on condition diff --git a/snakePipes/shared/rscripts/sleuth_allelic.R b/snakePipes/shared/rscripts/sleuth_allelic.R new file mode 100644 index 000000000..c88c5a0c6 --- /dev/null +++ b/snakePipes/shared/rscripts/sleuth_allelic.R @@ -0,0 +1,123 @@ +.libPaths(R.home("library")) + +library("sleuth") +library("dplyr") +#library("biomaRt") + +args = commandArgs(trailingOnly=TRUE) + +## Debug only !!! +## t2g_file = "snakemake_workflows/shared/organisms/dm6.t2g" +## sample_info_file = "sampleSheet.tsv" +## indir = "Salmon" +## outdir = "sleuth" + +sample_info_file = args[[1]] +indir = args[[2]] +outdir = args[[3]] +fdr = args[[4]] +t2g_file = args[[5]] + +setwd(outdir) + +sample_info = read.table(sample_info_file, header=T)#[,1:2] +coldata_allelic <- data.frame(name = paste0(rep(sample_info$name,each=2),c(".genome1",".genome2")), + allele = rep(c("genome1", "genome2"), nrow(sample_info)), + condition = rep(sample_info$condition, each = 2) ) +coldata_allelic$allele<-factor(coldata_allelic$allele,levels=c("genome1","genome2")) +coldata_allelic$condition<-factor(coldata_allelic$condition,levels=unique(coldata_allelic$condition)) +sample_info<-coldata_allelic + +if(length(unique(sample_info$condition))>1){ +message("Fitting full model with allele, condition and allele:condition.") + d<-formula(~allele + condition + allele:condition) +# d0<-formula(~allele + condition) +}else{ +message("Fitting full model with allele only.") + d<-formula(~allele) +# d0<-formula(~1) +} +colnames(sample_info)[colnames(sample_info) %in% "name"] ="sample" +print(sample_info) +sample_info$sample + +sample_id = list.dirs(file.path(indir), recursive=F, full.names=F) +sample_id = sort(sample_id[grep('[^benchmark][^SalmonIndex]', sample_id, invert=F)]) +print(sample_id) +#sample_id = intersect(sample_info$sample, sample_id) # get only those sample that are defined in the sampleInfo! +sample_id<-sample_id[match(sample_info$sample,sample_id)] +print(sample_id) + +salmon_dirs = sapply(sample_id, function(id) file.path(indir, id)) +print(salmon_dirs) + +s2c = mutate(sample_info, path=salmon_dirs) +## reorder conditions (for Wald test later on: order of comparison important for fold change) +s2c$condition<-factor(s2c$condition) +if ( s2c$condition[[1]] != levels(s2c$condition)[[1]] ) { + s2c$condition = relevel(s2c$condition, as.character(s2c$condition[[1]]) ) +} +print(s2c) + +## get gene names / symbol names +tryCatch( { t2g = read.table(t2g_file, header=F) },error = function(e) { print('No t2g file available!') },finally = {}) + +if (exists('t2g')) { + colnames(t2g) <- c("target_id","ens_gene","ext_gene") + + ## add gene names + so <- sleuth_prep(s2c, full_model=d, target_mapping = t2g, num_cores=6, + transformation_function = function(x) log2(x + 0.5)) +} else { + ## construct sleuth object + so = sleuth_prep(s2c, full_model=d, num_cores=6, + transformation_function = function(x) log2(x + 0.5)) +} + +## model expression responding on condition +so = sleuth_fit(so) +#so <- sleuth_fit(so, d, 'full') +#so <- sleuth_fit(so, d0, 'reduced') + + +# ## fit reduced model (not depending on any factor) +# so <- sleuth_fit(so, ~1, 'reduced') +# +# ## likelihood ratio test (LRT) between models to get transcripts affected by condition. +# ## Usually considered more stringent than Wald test. No fold change! +# so <- sleuth_lrt(so, 'reduced', 'full') +# results_table_lrt <- sleuth_results(so, 'reduced:full', test_type = 'lrt') +# head(results_table_lrt) + +## Wald test (to get *fold change*) +if(length(unique(sample_info$condition))>1){ + wald_beta_name = paste0("allelegenome2:condition",unique(coldata_allelic$condition)[2]) +}else{ + wald_beta_name = "allelegenome2" +} +so <- sleuth_wt(so, wald_beta_name, "full") +results_table_wt <- sleuth_results(so, wald_beta_name) +head(results_table_wt) +write.table(results_table_wt, "Wald-test.results.tsv", col.names=T, row.names=F, quote=F, sep="\t") +#so <- sleuth_lrt(so, 'reduced', 'full') +#results_table_lrt <- sleuth_results(so, 'reduced:full', test_type = 'lrt') +#write.table(results_table_lrt, "LRT-test.results.tsv", col.names=T, row.names=F, quote=F, sep="\t") + +## view fitted models and tests +models(so) +tests(so) + +saveRDS(so, file='so.rds') +so = readRDS("so.rds") + +write(c("library(sleuth)", + "so = readRDS('so.rds')", + "sleuth_live(so)"), + file="sleuth_live.R") + +pdf("MA-plot.pdf") +plot_ma(so, wald_beta_name, sig_level = fdr) +dev.off() + + +# sleuth_live(so, "wt") # wt for Wald test diff --git a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile index 4f3030179..f6365f469 100644 --- a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile @@ -54,7 +54,7 @@ rule DESeq2: ## DESeq2 (on Salmon) -rule DESeq2_Salmon: +rule DESeq2_Salmon_basic: input: counts_table = "Salmon/counts.transcripts.tsv", sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output, diff --git a/snakePipes/shared/rules/DESeq2.singleComp.snakefile b/snakePipes/shared/rules/DESeq2.singleComp.snakefile index 7939f7961..6216aefbf 100644 --- a/snakePipes/shared/rules/DESeq2.singleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.singleComp.snakefile @@ -41,7 +41,7 @@ rule DESeq2: ## DESeq2 (on Salmon) -rule DESeq2_Salmon: +rule DESeq2_Salmon_basic: input: counts_table = "Salmon/counts.transcripts.tsv", sampleSheet = sampleSheet, @@ -75,3 +75,38 @@ rule DESeq2_Salmon: "../{input.tx2gene_file} " # 7 "{params.rmdTemplate} " # 8 " > ../{log.out} 2> ../{log.err}" + +rule DESeq2_Salmon_allelic: + input: + counts_table = "SalmonAllelic/counts.transcripts.tsv", + sampleSheet = sampleSheet, + tx2gene_file = "Annotation/genes.filtered.t2g", + symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file + output: + "{}/DESeq2.session_info.txt".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)) + log: + out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)), + err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)) + benchmark: + "{}/.benchmark/DESeq2.SalmonAllelic.benchmark".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)) + params: + script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"), + outdir = get_outdir("DESeq2_SalmonAllelic",sampleSheet), + fdr = 0.05, + importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), + allele_info = 'TRUE', + tx2gene_file = "Annotation/genes.filtered.t2g", + rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") + conda: CONDA_RNASEQ_ENV + shell: + "cd {params.outdir} && " + "Rscript {params.script} " + "{input.sampleSheet} " # 1 + "../{input.counts_table} " # 2 + "{params.fdr} " # 3 + "../{input.symbol_file} " # 4 + "{params.importfunc} " # 5 + "{params.allele_info} " # 6 + "../{input.tx2gene_file} " # 7 + "{params.rmdTemplate} " # 8 + " > ../{log.out} 2> ../{log.err}" diff --git a/snakePipes/shared/rules/SNPsplit.snakefile b/snakePipes/shared/rules/SNPsplit.snakefile index d8107d883..37169ab55 100644 --- a/snakePipes/shared/rules/SNPsplit.snakefile +++ b/snakePipes/shared/rules/SNPsplit.snakefile @@ -19,7 +19,7 @@ if aligner == "Bowtie2": "SNPsplit {params.pairedEnd}" " -o {params.outdir} --snp_file {input.snp} {input.bam} 2> {log}" -elif aligner == "STAR": +elif aligner == "STAR" or aligner == "EXTERNAL_BAM": rule snp_split: input: snp = SNPFile, diff --git a/snakePipes/shared/rules/Salmon_allelic.snakefile b/snakePipes/shared/rules/Salmon_allelic.snakefile new file mode 100755 index 000000000..ed4df369b --- /dev/null +++ b/snakePipes/shared/rules/Salmon_allelic.snakefile @@ -0,0 +1,170 @@ +allelic_suffix = ['genome1', 'genome2'] + +## Salmon Index + +rule SalmonIndex: + input: + "Annotation/genes.filtered.fa", + genome_fasta + output: + "Salmon/SalmonIndex/decoys.txt", + temp("Salmon/SalmonIndex/seq.fa"), + "Salmon/SalmonIndex/seq.bin" + benchmark: + "Salmon/.benchmark/Salmon.index.benchmark" + params: + salmonIndexOptions = salmonIndexOptions + log: + out = "Salmon/SalmonIndex/SalmonIndex.out", + err = "Salmon/SalmonIndex/SalmonIndex.err", + threads: lambda wildcards: 16 if 16" {input[1]} | cut -d " " -f 1 | tr -d ">" > {output[0]} + cat {input[0]} {input[1]} > {output[1]} + salmon index -p {threads} -t {output[1]} -d {output[0]} -i Salmon/SalmonIndex {params.salmonIndexOptions} > {log.out} 2> {log.err} + """ + + +def getSalmon_libtype(pairedEnd, libraryType): + """ + Convert from a featureCounts library type to a HISAT2 option string + """ + if pairedEnd: + if libraryType == 1: + return "ISF" + elif libraryType == 2: + return "ISR" + else: + return "IU" + else: + if libraryType == 1: + return "SF" + elif libraryType == 2: + return "SR" + else: + return "U" + +## Salmon quant, the bootstraps are needed in Sleuth +if pairedEnd: + rule getAllelicFQ: + input: + allelic_bam="allelic_bams/{sample}.{allelic_suffix}.sorted.bam", + allelic_bai="allelic_bams/{sample}.{allelic_suffix}.sorted.bam.bai" + output: + r1="allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz", + r2="allelicFASTQ/{sample}.{allelic_suffix}_R2.fastq.gz" + log: "allelicFASTQ/logs/bam2fq.{sample}.{allelic_suffix}.log" + benchmark: "allelicFASTQ/.benchmark/bam2fq.{sample}.{allelic_suffix}.benchmark" + threads: 4 + conda: CONDA_SHARED_ENV + shell: """ + samtools collate -@ {threads} -u -O {input.allelic_bam} | \\ +samtools fastq -1 {output.r1} -2 {output.r2} -0 /dev/null -s /dev/null -n \\ + 2> {log} + """ + rule SalmonQuant: + input: + r1 = "allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz", + r2 = "allelicFASTQ/{sample}.{allelic_suffix}_R2.fastq.gz", + bin = "Salmon/SalmonIndex/seq.bin" + output: + quant = "SalmonAllelic/{sample}.{allelic_suffix}/quant.sf" + log: "SalmonAllelic/logs/{sample}.{allelic_suffix}.quant.log" + benchmark: + "SalmonAllelic/.benchmark/SalmonQuant.{sample}.{allelic_suffix}.benchmark" + params: + outdir = "SalmonAllelic/{sample}.{allelic_suffix}", + gtf = genes_gtf, + lib_type = getSalmon_libtype(pairedEnd, libraryType) + threads: 8 + conda: CONDA_RNASEQ_ENV + shell: """ + salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i Salmon/SalmonIndex -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir} 2> {log} + """ +else: + rule getAllelicFQ: + input: + allelic_bam="allelic_bams/{sample}.{allelic_suffix}.sorted.bam", + allelic_bai="allelic_bams/{sample}.{allelic_suffix}.sorted.bam.bai" + output: + r1="allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz" + log: "allelicFASTQ/logs/bam2fq.{sample}.{allelic_suffix}.log" + benchmark: "allelicFASTQ/.benchmark/bam2fq.{sample}.{allelic_suffix}.benchmark" + threads: 4 + conda: CONDA_SHARED_ENV + shell: """ + samtools collate -@ {threads} -u -O {input.allelic_bam} | \\ +samtools fastq -1 /dev/null -2 /dev/null -0 /dev/null -s {output.r1} -n \\ + 2> {log} + """ + rule SalmonQuant: + input: + fastq = "allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz", + bin = "SalmonAllelic/SalmonIndex/seq.bin", + output: + quant = "SalmonAllelic/{sample}.{allelic_suffix}/quant.sf" + log: "SalmonAllelic/logs/{sample}.{allelic_suffix}.quant.log" + benchmark: + "SalmonAllelic/.benchmark/SalmonQuant.{sample}.{allelic_suffix}.benchmark" + params: + outdir = "SalmonAllelic/{sample}.{allelic_suffix}", + gtf = genes_gtf, + lib_type = getSalmon_libtype(pairedEnd, libraryType) + threads: 8 + conda: CONDA_RNASEQ_ENV + shell: """ + salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i Salmon/SalmonIndex -l {params.lib_type} -r {input.fastq} -o {params.outdir} 2> {log} + """ + + +rule Salmon_symlinks: + input: + quant = "SalmonAllelic/{sample}.{allelic_suffix}/quant.sf" + output: + quant = "SalmonAllelic/{sample}.{allelic_suffix}.quant.sf" + shell: """ + ln -s ../{input.quant} {output.quant} + """ + + +rule Salmon_TPM: + input: + expand("SalmonAllelic/{sample}.{allelic_suffix}.quant.sf", sample=samples,allelic_suffix=allelic_suffix) + output: + "SalmonAllelic/TPM.transcripts.tsv" + benchmark: + "SalmonAllelic/.benchmark/Salmon_TPM.benchmark" + log: + "SalmonAllelic/logs/Salmon_TPM.log" + conda: CONDA_RNASEQ_ENV + shell: + "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_count_tables.R")+" Name TPM {output} {input} " + + +rule Salmon_counts: + input: + expand("SalmonAllelic/{sample}.{allelic_suffix}.quant.sf", sample=samples,allelic_suffix=allelic_suffix) + output: + "SalmonAllelic/counts.transcripts.tsv" + benchmark: + "SalmonAllelic/.benchmark/Salmon_counts.benchmark" + log: + "SalmonAllelic/logs/Salmon_counts.log" + conda: CONDA_RNASEQ_ENV + shell: + "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_count_tables.R")+" Name NumReads {output} {input} " + + +## Prepare Salmon output for Sleuth +rule Salmon_wasabi: + input: + "SalmonAllelic/{sample}.{allelic_suffix}.quant.sf" + output: + "SalmonAllelic/{sample}.{allelic_suffix}/abundance.h5" + log: "SalmonAllelic/logs/{sample}.{allelic_suffix}.wasabi.log" + params: + "SalmonAllelic/{sample}.{allelic_suffix}/" + conda: CONDA_RNASEQ_ENV + shell: + "Rscript "+os.path.join(maindir, "shared", "rscripts", "wasabi.R")+" {params} 2> {log}" diff --git a/snakePipes/shared/rules/envs/rna_seq.yaml b/snakePipes/shared/rules/envs/rna_seq.yaml index 4882f36ed..db4437bb7 100755 --- a/snakePipes/shared/rules/envs/rna_seq.yaml +++ b/snakePipes/shared/rules/envs/rna_seq.yaml @@ -12,7 +12,6 @@ dependencies: - salmon = 1.9.0 - r-base = 4.1.3 - r-wasabi - - r-sleuth - r-dplyr - r-ggplot2 - r-pheatmap diff --git a/snakePipes/shared/rules/envs/sleuth.yaml b/snakePipes/shared/rules/envs/sleuth.yaml new file mode 100755 index 000000000..6c504b2aa --- /dev/null +++ b/snakePipes/shared/rules/envs/sleuth.yaml @@ -0,0 +1,8 @@ +name: snakepipes_sleuth_environment_2.0 +channels: + - conda-forge + - bioconda +dependencies: + - r-base = 4.2.0 + - r-sleuth = 0.30.1 + - r-dplyr diff --git a/snakePipes/shared/rules/multiQC.snakefile b/snakePipes/shared/rules/multiQC.snakefile index cfe1ca21e..96cfeb229 100755 --- a/snakePipes/shared/rules/multiQC.snakefile +++ b/snakePipes/shared/rules/multiQC.snakefile @@ -81,8 +81,12 @@ def multiqc_input_check(return_value): infiles.append( expand("allelic_bams/{sample}.SNPsplit_sort.yaml", sample = samples) ) indir += "allelic_bams" if "alignment-free" in mode: - infiles.append( expand("Salmon/{sample}/quant.sf", sample = samples) ) - indir += " Salmon " + if "allelic-mapping" in mode: + infiles.append( expand("SalmonAllelic/{sample}.{allelic_suffix}/quant.sf", sample = samples,allelic_suffix=allelic_suffix) ) + indir += " SalmonAllelic " + else: + infiles.append( expand("Salmon/{sample}/quant.sf", sample = samples) ) + indir += " Salmon " elif pipeline == "noncoding-rna-seq": infiles.append(expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples)) indir += " STAR deepTools_qc " diff --git a/snakePipes/shared/rules/sleuth.multiComp.snakefile b/snakePipes/shared/rules/sleuth.multiComp.snakefile index 58661d6a9..3f1b9c7cb 100644 --- a/snakePipes/shared/rules/sleuth.multiComp.snakefile +++ b/snakePipes/shared/rules/sleuth.multiComp.snakefile @@ -17,11 +17,12 @@ rule sleuth_Salmon: indir = os.path.join(outdir,"Salmon"), outdir = os.path.join(outdir,"sleuth_Salmon_{}".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}")), sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)), - fdr = 0.05, + fdr = 0.05 + threads: 6 log: out = "sleuth_Salmon_{}/logs/sleuth.out".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), err = "sleuth_Salmon_{}/logs/sleuth.err".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}") - conda: CONDA_RNASEQ_ENV + conda: CONDA_SLEUTH_ENV shell: "Rscript {params.script} " "{params.sampleSheet} " diff --git a/snakePipes/shared/rules/sleuth.singleComp.snakefile b/snakePipes/shared/rules/sleuth.singleComp.snakefile index e14afb944..8b4c53644 100644 --- a/snakePipes/shared/rules/sleuth.singleComp.snakefile +++ b/snakePipes/shared/rules/sleuth.singleComp.snakefile @@ -15,11 +15,39 @@ rule sleuth_Salmon: script=os.path.join(maindir, "shared", "rscripts", "sleuth.R"), indir = os.path.join(outdir,"Salmon"), outdir = os.path.join(outdir,"sleuth_Salmon_{}".format(sample_name)), - fdr = 0.05, + fdr = 0.05 + threads: 6 log: out = "sleuth_Salmon_{}/logs/sleuth.out".format(sample_name), err = "sleuth_Salmon_{}/logs/sleuth.err".format(sample_name) - conda: CONDA_RNASEQ_ENV + conda: CONDA_SLEUTH_ENV + shell: + "Rscript {params.script} " + "{input.sampleSheet} " + "{params.indir} " + "{params.outdir} " + "{params.fdr} " + os.path.join(outdir,"{input.t2g}") + + " >{log.out} 2>{log.err}" + +rule sleuth_SalmonAllelic: + input: + quant_files = expand("SalmonAllelic/{sample}.{allele}/abundance.h5", sample=samples,allele=["genome1","genome2"]), + t2g = "Annotation/genes.filtered.t2g", + sampleSheet = sampleSheet + output: + "sleuth_SalmonAllelic_{}/so.rds".format(sample_name) + benchmark: + "sleuth_SalmonAllelic_{}/.benchmark/sleuth.Salmon.benchmark".format(sample_name) + params: + script=os.path.join(maindir, "shared", "rscripts", "sleuth_allelic.R"), + indir = os.path.join(outdir,"SalmonAllelic"), + outdir = os.path.join(outdir,"sleuth_SalmonAllelic_{}".format(sample_name)), + fdr = 0.05 + threads: 6 + log: + out = "sleuth_SalmonAllelic_{}/logs/sleuth.out".format(sample_name), + err = "sleuth_SalmonAllelic_{}/logs/sleuth.err".format(sample_name) + conda: CONDA_SLEUTH_ENV shell: "Rscript {params.script} " "{input.sampleSheet} " diff --git a/snakePipes/workflows/mRNA-seq/Snakefile b/snakePipes/workflows/mRNA-seq/Snakefile index baf145378..f48af4223 100755 --- a/snakePipes/workflows/mRNA-seq/Snakefile +++ b/snakePipes/workflows/mRNA-seq/Snakefile @@ -71,27 +71,39 @@ if not fromBAM: include: os.path.join(maindir, "shared", "rules", "SNPsplit.snakefile") # deepTools QC include: os.path.join(maindir, "shared", "rules", "deepTools_RNA_allelic.snakefile") + if "alignment-free" in mode: + include: os.path.join(maindir, "shared", "rules", "Salmon_allelic.snakefile") + include: os.path.join(maindir, "shared", "rules", "sleuth.singleComp.snakefile") +# include: os.path.join(maindir, "shared", "rules", "DESeq2.singleComp.snakefile") else: # HISAT2/STAR include: os.path.join(maindir, "shared", "rules", "RNA_mapping.snakefile") - - ## Salmon - if "alignment-free" in mode: - include: os.path.join(maindir, "shared", "rules", "Salmon.snakefile") - ## Sleuth (on Salmon) - if sampleSheet: - if isMultipleComparison: - include: os.path.join(maindir, "shared", "rules", "sleuth.multiComp.snakefile") - else: - include: os.path.join(maindir, "shared", "rules", "sleuth.singleComp.snakefile") + ## Salmon + if "alignment-free" in mode: + include: os.path.join(maindir, "shared", "rules", "Salmon.snakefile") + ## Sleuth (on Salmon) + if sampleSheet: + if isMultipleComparison: + include: os.path.join(maindir, "shared", "rules", "sleuth.multiComp.snakefile") + else: + include: os.path.join(maindir, "shared", "rules", "sleuth.singleComp.snakefile") else: fastqc=False trim=False downsample=None + if "allelic-mapping" in mode: + allele_mode = "from_bam" + star_index_allelic = NMaskedIndex + SNPFile = SNPfile + # SNPsplit + include: os.path.join(maindir, "shared", "rules", "SNPsplit.snakefile") + # deepTools QC + include: os.path.join(maindir, "shared", "rules", "deepTools_RNA_allelic.snakefile") include: os.path.join(maindir, "shared", "rules", "LinkBam.snakefile") + if "allelic-mapping" in mode: ## featureCounts_allelic include: os.path.join(maindir, "shared", "rules", "featureCounts_allelic.snakefile") @@ -107,7 +119,7 @@ include:os.path.join(maindir, "shared", "rules", "RNA-seq_qc_report.snakefile") ## DESeq2 if sampleSheet: - if isMultipleComparison: + if isMultipleComparison and not "allelic-mapping" in mode: include: os.path.join(maindir, "shared", "rules", "DESeq2.multipleComp.snakefile") if rMats: include: os.path.join(maindir, "shared", "rules", "rMats.multipleComp.snakefile") @@ -146,27 +158,39 @@ def run_Trimming(trim, fastqc): def run_alignment_free(): if "alignment-free" in mode: - file_list = [ - expand("Salmon/{sample}.quant.sf", sample=samples), - "Salmon/TPM.transcripts.tsv", - "Salmon/counts.transcripts.tsv", - expand("Salmon/{sample}/abundance.h5", sample=samples) ] - - if sampleSheet: - sample_name = os.path.splitext(os.path.basename(sampleSheet))[0] - if not isMultipleComparison: - file_list.append( ["DESeq2_Salmon_{}/DESeq2.session_info.txt".format(sample_name)]) - if cf.check_replicates(sampleSheet): - file_list.append( ["sleuth_Salmon_{}/so.rds".format(sample_name)] ) - if rMats: - file_list.append( ["rMats_{}/RI.MATS.JCEC.txt".format(sample_name)] ) - file_list.append( ["rMats_{}/b2.csv".format(sample_name)] ) - else: - file_list.append(expand("DESeq2_Salmon_{}".format(sample_name) + ".{compGroup}/DESeq2.session_info.txt",compGroup=cf.returnComparisonGroups(sampleSheet))) - file_list.append(expand("sleuth_Salmon_{}".format(sample_name) + ".{compGroup}/so.rds",compGroup=cf.returnComparisonGroups(sampleSheet))) - if rMats: - file_list.append(expand("rMats_{}".format(sample_name) + ".{compGroup}/RI.MATS.JCEC.txt",compGroup=cf.returnComparisonGroups(sampleSheet))) - file_list.append(expand("rMats_{}".format(sample_name) + ".{compGroup}/b2.csv",compGroup=cf.returnComparisonGroups(sampleSheet))) + if not "allelic-mapping" in mode: + file_list = [ + expand("Salmon/{sample}.quant.sf", sample=samples), + "Salmon/TPM.transcripts.tsv", + "Salmon/counts.transcripts.tsv", + expand("Salmon/{sample}/abundance.h5", sample=samples) ] + + if sampleSheet: + sample_name = os.path.splitext(os.path.basename(sampleSheet))[0] + if not isMultipleComparison: + file_list.append( ["DESeq2_Salmon_{}/DESeq2.session_info.txt".format(sample_name)]) + if cf.check_replicates(sampleSheet): + file_list.append( ["sleuth_Salmon_{}/so.rds".format(sample_name)] ) + if rMats: + file_list.append( ["rMats_{}/RI.MATS.JCEC.txt".format(sample_name)] ) + file_list.append( ["rMats_{}/b2.csv".format(sample_name)] ) + else: + file_list.append(expand("DESeq2_Salmon_{}".format(sample_name) + ".{compGroup}/DESeq2.session_info.txt",compGroup=cf.returnComparisonGroups(sampleSheet))) + file_list.append(expand("sleuth_Salmon_{}".format(sample_name) + ".{compGroup}/so.rds",compGroup=cf.returnComparisonGroups(sampleSheet))) + if rMats: + file_list.append(expand("rMats_{}".format(sample_name) + ".{compGroup}/RI.MATS.JCEC.txt",compGroup=cf.returnComparisonGroups(sampleSheet))) + file_list.append(expand("rMats_{}".format(sample_name) + ".{compGroup}/b2.csv",compGroup=cf.returnComparisonGroups(sampleSheet))) + else: + file_list = [ + expand("SalmonAllelic/{sample}.{allelic_suffix}.quant.sf", sample=samples,allelic_suffix=allelic_suffix), + "SalmonAllelic/TPM.transcripts.tsv", + "SalmonAllelic/counts.transcripts.tsv", + expand("SalmonAllelic/{sample}.{allelic_suffix}/abundance.h5", sample=samples,allelic_suffix=allelic_suffix) ] + if sampleSheet: + sample_name = os.path.splitext(os.path.basename(sampleSheet))[0] + if not isMultipleComparison: + file_list.append( ["DESeq2_SalmonAllelic_{}/DESeq2.session_info.txt".format(sample_name)]) + file_list.append( ["sleuth_SalmonAllelic_{}/so.rds".format(sample_name)] ) return(file_list) else: return([]) @@ -309,8 +333,6 @@ onstart: ################################################################################ if not fromBAM: - localrules: Salmon_wasabi, sleuth_Salmon - rule all: input: @@ -325,11 +347,11 @@ if not fromBAM: "multiQC/multiqc_report.html" else: - localrules: Salmon_wasabi rule all: input: run_alignment(), + run_allelesp_mapping(), run_deepTools_qc(), run_GenomicContamination(), "multiQC/multiqc_report.html" diff --git a/snakePipes/workflows/mRNA-seq/cluster.yaml b/snakePipes/workflows/mRNA-seq/cluster.yaml index bff08a4e8..9b340f000 100644 --- a/snakePipes/workflows/mRNA-seq/cluster.yaml +++ b/snakePipes/workflows/mRNA-seq/cluster.yaml @@ -14,9 +14,13 @@ annotation_bed2fasta: memory: 4G sleuth_Salmon: memory: 4G +sleuth_SalmonAllelic: + memory: 10G DESeq2: memory: 5G -DESeq2_Salmon: +DESeq2_Salmon_basic: + memory: 3G +DESeq2_Salmon_allelic: memory: 3G star_index: memory: 15G diff --git a/snakePipes/workflows/mRNA-seq/mRNA-seq b/snakePipes/workflows/mRNA-seq/mRNA-seq index d3cb61b46..2a62d8bd4 100755 --- a/snakePipes/workflows/mRNA-seq/mRNA-seq +++ b/snakePipes/workflows/mRNA-seq/mRNA-seq @@ -152,8 +152,8 @@ def main(): sys.exit("{} is not a valid mode!\n".format(mode)) if "alignment" not in modeTemp and args.UMIDedup: sys.exit("UMIDedup is only valid for \"alignment\" mode!\n") - if args.fromBAM and ("alignment-free" in modeTemp or "allelic-mapping" in modeTemp): - sys.exit("\n--fromBAM can only be used with modes \'alignment\' or \'deepTools_qc\' - use one of these modes or provide fastq files!\n") + if args.fromBAM and ("alignment-free" in modeTemp ): + sys.exit("\n--fromBAM can only be used with modes \'alignment\' , \'allelic-mapping\' or \'deepTools_qc\' - use one of these modes or provide fastq files!\n") if args.fromBAM: args.aligner = "EXTERNAL_BAM" if args.rMats and not args.sampleSheet: