diff --git a/CHANGELOG.md b/CHANGELOG.md index 66fcc36f..4785e9fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Template update to 2.11.1 ([#105](https://github.com/nf-core/crisprseq/pull/105)) +- Added a csv input option for crisprcleanr ([#105](https://github.com/nf-core/crisprseq/pull/105)) +- Added a contrasts parameter so the pipeline automatically creates design matrices and MAGeCK MLE ([#109](https://github.com/nf-core/crisprseq/pull/109)) ## [v2.1.1 - Jamon Salas - patch](https://github.com/nf-core/crisprseq/releases/tag/2.1.1) - [14.12.2023] diff --git a/conf/modules.config b/conf/modules.config index 0c057c08..2c6330b3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -104,7 +104,7 @@ process { withName: MAGECK_MLE { publishDir = [ - path: { "${params.outdir}/mageck/mle/${meta.id}/" }, + path: { "${params.outdir}/mageck/mle/${meta.treatment}_vs_${meta.reference}/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/docs/usage/screening.md b/docs/usage/screening.md index b0c6dcac..ba7fcc47 100644 --- a/docs/usage/screening.md +++ b/docs/usage/screening.md @@ -21,7 +21,7 @@ nextflow run nf-core/crisprseq --analysis screening --input samplesheet.csv --li ``` The following required parameters are here described. -If you wish to input a raw count or normalized table, you can skip the samplesheet parameter as well as the library one and directly input your table using count_table `--count_table your_count_table`. If your count table is normalized, be sure to set the normalization method to none in MAGeCK MLE or MAGeCK RRA using a config file. +If you wish to input a raw count or normalized table, you can skip the samplesheet parameter as well as the library one and directly input your table using count_table `--count_table your_count_table`. Your count table should contain the following columns : sgRNA and gene. You can find an example [here](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/count_table.csv) If your count table is normalized, be sure to set the normalization method to none in MAGeCK MLE or MAGeCK RRA using a config file. ### Full samplesheet @@ -58,7 +58,17 @@ We recommend to run MAGeCK MLE and BAGEL2 as these are the most used and most re ### Running CRISPRcleanR -CRISPRcleanR is used for gene count normalization and the removal of biases for genomic segments for which copy numbers are amplified. Currently, the pipeline only supports annotation libraries already present in the R package and which can be found [here](https://github.com/francescojm/CRISPRcleanR/blob/master/Reference_Manual.pdf). To use CRISPRcleanR normalization, use `--crisprcleanr library`, `library` being the exact name as the library in the CRISPRcleanR documentation (e.g: "AVANA_Library"). +CRISPRcleanR is used for gene count normalization and the removal of biases for genomic segments for which copy numbers are amplified. Currently, the pipeline supports annotation libraries already present in the R package or a annotation file the user can provide. +Most used library already have an annotation dataset which you can find [here](https://github.com/francescojm/CRISPRcleanR/blob/master/Reference_Manual.pdf). To use CRISPRcleanR normalization, use `--crisprcleanr library`, `library` being the exact name as the library in the CRISPRcleanR documentation (e.g: "AVANA_Library"). +Otherwise, if you wish to provide your own file, please provide it in csv form, and make sure it follows the following format : + +| ,CODE | GENES | EXONE | CHRM | STRAND | STARTpos | ENDpos | +| -------------------- | -------------- | ------- | ---- | ------ | -------- | --------- | --------- | +| AAAAAAAAAAAATGCATTCT | NM_183035.1 | Defb34 | ex2 | 8 | - | 19126349 | 19126369 | +| AAAAAAAAATAAGCTCACCC | NM_001170853.1 | Mndal | ex5 | 1 | + | 173872968 | 173872988 | +| AAAAAAAATCCTGTCGCCCA | NM_001039049.1 | Cox8c | ex1 | 12 | + | 102899487 | 102899507 | +| AAAAAAATCGGCATACCATG | NM_178627.3 | Poldip3 | ex4 | 15 | - | 83135295 | 83135315 | +| AAAAAAATGACATTACTGCA | NM_026602.3 | Bcas2 | ex4 | 3 | + | 103174386 | 103174406 | ### Running MAGeCK MLE and BAGEL2 with a contrast file @@ -73,7 +83,7 @@ A full example can be found [here](https://raw.githubusercontent.com/nf-core/tes ### Running MAGeCK RRA only -MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK rra, you can define the contrasts as previously stated in the last section and also specify `--rra` . +MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK rra, you can define the contrasts as previously stated in the last section (with a `.txt` extension) and also specify `--rra` . ### Running MAGeCK MLE only diff --git a/modules/local/bagel2/bf.nf b/modules/local/bagel2/bf.nf index 3ff86603..e2db4e55 100644 --- a/modules/local/bagel2/bf.nf +++ b/modules/local/bagel2/bf.nf @@ -23,7 +23,7 @@ process BAGEL2_BF { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.treatment}" + def prefix = task.ext.prefix ?: "${meta.treatment}_vs_${meta.reference}" """ BAGEL.py bf -i $foldchange -o '${meta.treatment}_vs_${meta.reference}.bf' $args -e $reference_essentials -n $reference_nonessentials -c ${meta.treatment} diff --git a/modules/local/bagel2/pr.nf b/modules/local/bagel2/pr.nf index e2290ede..53a1f2fb 100644 --- a/modules/local/bagel2/pr.nf +++ b/modules/local/bagel2/pr.nf @@ -1,5 +1,5 @@ process BAGEL2_PR { - tag "$meta.treatment" + tag "${meta.treatment}_vs_${meta.reference}" label 'process_single' conda "python=3.11.4 pandas=2.0.3 numpy=1.25.1 scikit-learn=1.3.0 click=8.1.6" diff --git a/modules/local/matricescreation.nf b/modules/local/matricescreation.nf index 43867fa6..2c558d8f 100644 --- a/modules/local/matricescreation.nf +++ b/modules/local/matricescreation.nf @@ -7,10 +7,10 @@ process MATRICESCREATION { 'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }" input: - path(contrasts) + val(meta) output: - path("*.txt"), emit: design_matrix + tuple val(meta), path("*.txt"), emit: design_matrix when: task.ext.when == null || task.ext.when @@ -22,30 +22,26 @@ process MATRICESCREATION { #### author: Laurence Kuhlburger #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text. #### - #### Orient a reference sequence according to reads orientation. + #### Create design matrices - data <- read.table("$contrasts", header = TRUE, sep = ";", stringsAsFactors = FALSE) # Loop through each row in the data - for (i in 1:nrow(data)) { - # Extract control and treatment samples for the current row - control_samples <- unlist(strsplit(data\$reference[i], ",")) - treatment_samples <- unlist(strsplit(data\$treatment[i], ",")) - - # Create a vector of all unique samples - all_samples <- unique(c(control_samples, treatment_samples)) - - # Initialize a matrix to store the design matrix - design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3, - dimnames = list(all_samples, c("Samples", "baseline", paste0(gsub(',', '_', data\$treatment[i] ),"_vs_", data\$reference[i]))))) - - # Set baseline and treatment values in the design matrix - design_matrix[, "Samples"] <- rownames(design_matrix) - design_matrix\$baseline <- 1 - design_matrix[treatment_samples, paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]))] <- 1 - - # Print the design matrix to a file - output_file <- paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]),".txt") - write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE) - } + control_samples <- unlist(strsplit('${meta.reference}', ",")) + treatment_samples <- unlist(strsplit('$meta.treatment', ",")) + all_samples <- unique(c(control_samples, treatment_samples)) + design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3, + dimnames = list(all_samples, + c("Samples", "baseline", + paste0(gsub(',', '_', '$meta.treatment'),"_vs_",gsub(',','_','$meta.reference')))))) + name = paste0(gsub(',', '_', '$meta.treatment' ),"_vs_", gsub(',', '_','$meta.reference')) + # Set baseline and treatment values in the design matrix + design_matrix[, "Samples"] <- rownames(design_matrix) + design_matrix\$baseline <- 1 + design_matrix[treatment_samples, name] <- 1 + design_matrix[treatment_samples, paste0(gsub(',', '_', '$meta.treatment'),"_vs_",gsub(",","_",'$meta.reference'))] <- 1 + + # Print the design matrix to a file + output_file <- paste0(gsub(',', '_', '$meta.treatment' ),"_vs_",gsub(",","_",'$meta.reference'),".txt") + write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE) + """ } diff --git a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff index 50dc1a76..daa9446f 100644 --- a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff +++ b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff @@ -1,12 +1,14 @@ Changes in module 'nf-core/crisprcleanr/normalize' --- modules/nf-core/crisprcleanr/normalize/main.nf +++ modules/nf-core/crisprcleanr/normalize/main.nf -@@ -8,12 +8,13 @@ +@@ -8,12 +8,15 @@ 'biocontainers/r-crisprcleanr:3.0.0--r42hdfd78af_1' }" input: - tuple val(meta), path(count_file), path(library_file) -+ tuple val(meta), path(count_file), val(library_file) ++ tuple val(meta), path(count_file) ++ val(library_value) ++ path(library_file) val(min_reads) val(min_targeted_genes) @@ -16,42 +18,58 @@ Changes in module 'nf-core/crisprcleanr/normalize' path "versions.yml", emit: versions when: -@@ -26,20 +27,32 @@ +@@ -26,20 +29,48 @@ """ #!/usr/bin/env Rscript library(CRISPRcleanR) - library <- read.delim('${library_file}', header=T,sep="\t") - row.names(library) <- library[["CODE"]] - normANDfcs <- ccr.NormfoldChanges('${count_file}',saveToFig = FALSE,min_reads=${min_reads},EXPname='${meta.id}', libraryAnnotation=library,display=FALSE) -- gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],library) -- correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta.id}') -- correctedCounts <- ccr.correctCounts('${meta.id}', + library(dplyr) -+ data('${library_file}') -+ count_file <- read.delim('${count_file}',header=T,sep = "\t") -+ count_file_to_normalize <- count_file %>% dplyr::left_join(get('${library_file}'), by=c("sgRNA"="Target.Context.Sequence"),multiple = "all") + -+ count_file_to_normalize <- count_file_to_normalize %>% -+ dplyr::select(colnames(count_file),CODE,-sgRNA) ++ print('${library_value}') ++ count_file <- read.delim('${count_file}',header=T,sep = "\t") ++ count_file <- count_file[!duplicated(count_file\$sgRNA), ] ++ if('${library_file}' == "") { ++ data('${library_value}') ++ library <- as.data.frame(get('${library_value}')) ++ #colnames(library) ++ #print(head(count_file)) ++ #print(head(library)) ++ count_file_to_normalize <- count_file %>% dplyr::left_join(library, by=c("sgRNA"="seq"),multiple = "all") ++ count_file_to_normalize <- count_file_to_normalize %>% ++ dplyr::select(colnames(count_file),CODE,-sgRNA) + -+ names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' -+ names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' -+ count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) ++ names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' ++ names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' ++ count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) ++ } else { ++ try(library <- read.delim('${library_file}',header=T,sep = ",")) ++ duplicates <- duplicated(library[, 1]) ++ unique_rows <- !duplicates ++ library <- library[unique_rows, , drop = FALSE] ++ rownames(library) = library[,1] ++ library = library[order(rownames(library)),] ++ library = library[,-1] ++ count_file_to_normalize <- count_file ++ } + -+ #crisprcleanr function -+ normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=get('${library_file}'),display=FALSE) -+ gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],get('${library_file}')) -+ correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta}') -+ correctedCounts <- ccr.correctCounts('${meta}', ++ normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=library,display=FALSE) + gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],library) +- correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta.id}') +- correctedCounts <- ccr.correctCounts('${meta.id}', ++ correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='crisprcleanr') ++ correctedCounts <- ccr.correctCounts('crisprcleanr', normANDfcs[["norm_counts"]], correctedFCs, -- library, -+ get('${library_file}'), + library, minTargetedGenes=${min_targeted_genes}, OutDir='./') - write.table(correctedCounts, file=paste0("${prefix}","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") - +- write.table(correctedCounts, file=paste0("${prefix}","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") +- ++ write.table(correctedCounts, file=paste0("crisprcleanr","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") ++ + #version version_file_path <- "versions.yml" version_crisprcleanr <- paste(unlist(packageVersion("CRISPRcleanR")), collapse = ".") diff --git a/modules/nf-core/crisprcleanr/normalize/main.nf b/modules/nf-core/crisprcleanr/normalize/main.nf index 33e4ecd1..d8969379 100644 --- a/modules/nf-core/crisprcleanr/normalize/main.nf +++ b/modules/nf-core/crisprcleanr/normalize/main.nf @@ -8,7 +8,9 @@ process CRISPRCLEANR_NORMALIZE { 'biocontainers/r-crisprcleanr:3.0.0--r42hdfd78af_1' }" input: - tuple val(meta), path(count_file), val(library_file) + tuple val(meta), path(count_file) + val(library_value) + path(library_file) val(min_reads) val(min_targeted_genes) @@ -28,30 +30,46 @@ process CRISPRCLEANR_NORMALIZE { #!/usr/bin/env Rscript library(CRISPRcleanR) library(dplyr) - data('${library_file}') - count_file <- read.delim('${count_file}',header=T,sep = "\t") - count_file_to_normalize <- count_file %>% dplyr::left_join(get('${library_file}'), by=c("sgRNA"="Target.Context.Sequence"),multiple = "all") - count_file_to_normalize <- count_file_to_normalize %>% - dplyr::select(colnames(count_file),CODE,-sgRNA) + print('${library_value}') + count_file <- read.delim('${count_file}',header=T,sep = "\t") + count_file <- count_file[!duplicated(count_file\$sgRNA), ] + if('${library_file}' == "") { + data('${library_value}') + library <- as.data.frame(get('${library_value}')) + #colnames(library) + #print(head(count_file)) + #print(head(library)) + count_file_to_normalize <- count_file %>% dplyr::left_join(library, by=c("sgRNA"="seq"),multiple = "all") + count_file_to_normalize <- count_file_to_normalize %>% + dplyr::select(colnames(count_file),CODE,-sgRNA) - names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' - names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' - count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) + names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' + names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' + count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) + } else { + try(library <- read.delim('${library_file}',header=T,sep = ",")) + duplicates <- duplicated(library[, 1]) + unique_rows <- !duplicates + library <- library[unique_rows, , drop = FALSE] + rownames(library) = library[,1] + library = library[order(rownames(library)),] + library = library[,-1] + count_file_to_normalize <- count_file + } - #crisprcleanr function - normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=get('${library_file}'),display=FALSE) - gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],get('${library_file}')) - correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta}') - correctedCounts <- ccr.correctCounts('${meta}', + normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=library,display=FALSE) + gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],library) + correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='crisprcleanr') + correctedCounts <- ccr.correctCounts('crisprcleanr', normANDfcs[["norm_counts"]], correctedFCs, - get('${library_file}'), + library, minTargetedGenes=${min_targeted_genes}, OutDir='./') - write.table(correctedCounts, file=paste0("${prefix}","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") - + write.table(correctedCounts, file=paste0("crisprcleanr","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") + #version version_file_path <- "versions.yml" version_crisprcleanr <- paste(unlist(packageVersion("CRISPRcleanR")), collapse = ".") diff --git a/modules/nf-core/mageck/mle/mageck-mle.diff b/modules/nf-core/mageck/mle/mageck-mle.diff index f5d5c324..c830b3fe 100644 --- a/modules/nf-core/mageck/mle/mageck-mle.diff +++ b/modules/nf-core/mageck/mle/mageck-mle.diff @@ -3,8 +3,9 @@ Changes in module 'nf-core/mageck/mle' +++ modules/nf-core/mageck/mle/main.nf @@ -1,6 +1,6 @@ process MAGECK_MLE { - tag "$meta.id" +- tag "$meta.id" - label 'process_medium' ++ tag "$prefix" + label 'process_high' conda "${moduleDir}/environment.yml" @@ -15,9 +16,19 @@ Changes in module 'nf-core/mageck/mle' input: - tuple val(meta), path(count_table) - path(design_matrix) -+ tuple val(meta), path(count_table), path(design_matrix) ++ tuple val(meta), path(design_matrix), path(count_table) output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary +@@ -21,7 +20,8 @@ + + script: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ prefix = meta.id ?: "${meta.treatment}_vs_${meta.treatment}" ++ + + """ + mageck \\ ************************************************************ diff --git a/modules/nf-core/mageck/mle/main.nf b/modules/nf-core/mageck/mle/main.nf index 75ff06f2..4dbe4953 100644 --- a/modules/nf-core/mageck/mle/main.nf +++ b/modules/nf-core/mageck/mle/main.nf @@ -1,5 +1,5 @@ process MAGECK_MLE { - tag "$meta.id" + tag "$prefix" label 'process_high' conda "${moduleDir}/environment.yml" @@ -8,7 +8,7 @@ process MAGECK_MLE { 'biocontainers/mageck:0.5.9--py37h6bb024c_0' }" input: - tuple val(meta), path(count_table), path(design_matrix) + tuple val(meta), path(design_matrix), path(count_table) output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary @@ -20,7 +20,8 @@ process MAGECK_MLE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = meta.id ?: "${meta.treatment}_vs_${meta.treatment}" + """ mageck \\ diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf index fbaa87ea..a4c4a518 100644 --- a/workflows/crisprseq_screening.nf +++ b/workflows/crisprseq_screening.nf @@ -17,7 +17,13 @@ WorkflowCrisprseq.initialise(params, log) // Set screening parameters and channels if (params.library) { ch_library = file(params.library) } -if (params.crisprcleanr) { ch_crisprcleanr = Channel.value(params.crisprcleanr) } +if (params.crisprcleanr) { + if(params.crisprcleanr.endsWith(".csv")) { + ch_crisprcleanr = Channel.fromPath(params.crisprcleanr) + } else { + ch_crisprcleanr = Channel.value(params.crisprcleanr) + } +} if(params.mle_design_matrix) { Channel.fromPath(params.mle_design_matrix) @@ -47,8 +53,16 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + */ +include { BAGEL2_FC } from '../modules/local/bagel2/fc' +include { BAGEL2_BF } from '../modules/local/bagel2/bf' +include { BAGEL2_PR } from '../modules/local/bagel2/pr' +include { BAGEL2_GRAPH } from '../modules/local/bagel2/graph' +include { MATRICESCREATION } from '../modules/local/matricescreation' + // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // @@ -62,20 +76,16 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { CUTADAPT } from '../modules/nf-core/cutadapt/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { MAGECK_COUNT } from '../modules/nf-core/mageck/count/main' -include { MAGECK_MLE } from '../modules/nf-core/mageck/mle/main' -include { MAGECK_TEST } from '../modules/nf-core/mageck/test/main' -include { MAGECK_GRAPHRRA } from '../modules/local/mageck/graphrra' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { CRISPRCLEANR_NORMALIZE } from '../modules/nf-core/crisprcleanr/normalize/main' -include { BAGEL2_FC } from '../modules/local/bagel2/fc' -include { BAGEL2_BF } from '../modules/local/bagel2/bf' -include { BAGEL2_PR } from '../modules/local/bagel2/pr' -include { BAGEL2_GRAPH } from '../modules/local/bagel2/graph' -include { MATRICESCREATION } from '../modules/local/matricescreation' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { CUTADAPT } from '../modules/nf-core/cutadapt/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MAGECK_COUNT } from '../modules/nf-core/mageck/count/main' +include { MAGECK_MLE } from '../modules/nf-core/mageck/mle/main' +include { MAGECK_TEST } from '../modules/nf-core/mageck/test/main' +include { MAGECK_GRAPHRRA } from '../modules/local/mageck/graphrra' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { CRISPRCLEANR_NORMALIZE } from '../modules/nf-core/crisprcleanr/normalize/main' +include { MAGECK_MLE as MAGECK_MLE_MATRIX } from '../modules/nf-core/mageck/mle/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -115,7 +125,6 @@ workflow CRISPRSEQ_SCREENING { ) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - ch_input_cutadapt = ch_input.combine(Channel.value([[]])) if(params.cutadapt) { @@ -132,7 +141,6 @@ workflow CRISPRSEQ_SCREENING { } // this is to concatenate everything for mageck count - ch_input .map { meta, fastqs -> if(fastqs.size() == 1){ @@ -174,14 +182,26 @@ workflow CRISPRSEQ_SCREENING { .set { ch_counts } } - if(params.crisprcleanr) { - ch_crispr_normalize = Channel.of([id: "count_table_normalize"]) - CRISPRCLEANR_NORMALIZE( - ch_crispr_normalize.concat(ch_counts,ch_crisprcleanr).collect(), - params.min_reads, - params.min_targeted_genes - ) + ch_crispr_normalize = Channel.of([id: "count_table_normalize"]).concat(ch_counts) + + if(params.crisprcleanr.endsWith(".csv")) { + CRISPRCLEANR_NORMALIZE( + ch_crispr_normalize.collect(), + '', + ch_crisprcleanr, + params.min_reads, + params.min_targeted_genes + ) } else + { + ch_crispr_normalize = Channel.of([id: "count_table_normalize"]).concat(ch_counts) + CRISPRCLEANR_NORMALIZE( + ch_crispr_normalize.collect(), + ch_crisprcleanr, + [], + params.min_reads, + params.min_targeted_genes) + } ch_versions = ch_versions.mix(CRISPRCLEANR_NORMALIZE.out.versions) @@ -212,12 +232,12 @@ workflow CRISPRSEQ_SCREENING { if(params.contrasts) { Channel.fromPath(params.contrasts) .splitCsv(header:true, sep:';' ) - .set { ch_bagel } - counts = ch_bagel.combine(ch_counts) + .set { ch_contrasts } + counts = ch_contrasts.combine(ch_counts) //Define non essential and essential genes channels for bagel2 - ch_bagel_reference_essentials= Channel.value(params.bagel_reference_essentials) - ch_bagel_reference_nonessentials= Channel.value(params.bagel_reference_nonessentials) + ch_bagel_reference_essentials= Channel.fromPath(params.bagel_reference_essentials).first() + ch_bagel_reference_nonessentials= Channel.fromPath(params.bagel_reference_nonessentials).first() BAGEL2_FC ( counts @@ -252,23 +272,18 @@ workflow CRISPRSEQ_SCREENING { if((params.mle_design_matrix) || (params.contrasts && !params.rra)) { if(params.mle_design_matrix) { - ch_mle = ch_counts.combine(ch_design) - } + ch_design.map { + it -> [[id: it.getBaseName()], it] + }.set { ch_designed_mle } + ch_mle = ch_designed_mle.combine(ch_counts) + MAGECK_MLE_MATRIX (ch_mle) + } if(params.contrasts) { - MATRICESCREATION(params.contrasts) - ch_mle = ch_counts.combine(MATRICESCREATION.out.design_matrix) + MATRICESCREATION(ch_contrasts) + ch_mle = MATRICESCREATION.out.design_matrix.combine(ch_counts) + MAGECK_MLE (ch_mle) + ch_versions = ch_versions.mix(MAGECK_MLE.out.versions) } - ch_mle.map { - it -> [[id: it[1].getBaseName()], it[0], it[1]] - }.set { ch_designed_mle } - - MAGECK_MLE ( - ch_designed_mle - ) - - ch_versions = ch_versions.mix(MAGECK_MLE.out.versions) - - } CUSTOM_DUMPSOFTWAREVERSIONS (