nf-core · LaurenceKuhl · Feb 15, 2024 · Jan 15, 2024 · Jan 19, 2024 · Jan 22, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Template update to 2.11.1 ([#105](https://github.com/nf-core/crisprseq/pull/105))
+- Added a csv input option for crisprcleanr ([#105](https://github.com/nf-core/crisprseq/pull/105))
+- Added a contrasts parameter so the pipeline automatically creates design matrices and MAGeCK MLE ([#109](https://github.com/nf-core/crisprseq/pull/109))
+
 
 ## [v2.1.1 - Jamon Salas - patch](https://github.com/nf-core/crisprseq/releases/tag/2.1.1) - [14.12.2023]
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -104,7 +104,7 @@ process {
 
     withName: MAGECK_MLE {
         publishDir       = [
-            path: { "${params.outdir}/mageck/mle/${meta.id}/" },
+            path: { "${params.outdir}/mageck/mle/${meta.treatment}_vs_${meta.reference}/" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]

diff --git a/docs/usage/screening.md b/docs/usage/screening.md
@@ -21,7 +21,7 @@ nextflow run nf-core/crisprseq --analysis screening --input samplesheet.csv --li
 ```
 
 The following required parameters are here described.
-If you wish to input a raw count or normalized table, you can skip the samplesheet parameter as well as the library one and directly input your table using count_table `--count_table your_count_table`. If your count table is normalized, be sure to set the normalization method to none in MAGeCK MLE or MAGeCK RRA using a config file.
+If you wish to input a raw count or normalized table, you can skip the samplesheet parameter as well as the library one and directly input your table using count_table `--count_table your_count_table`. Your count table should contain the following columns : sgRNA and gene. You can find an example [here](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/count_table.csv) If your count table is normalized, be sure to set the normalization method to none in MAGeCK MLE or MAGeCK RRA using a config file.
 
 ### Full samplesheet
 
@@ -58,7 +58,17 @@ We recommend to run MAGeCK MLE and BAGEL2 as these are the most used and most re
 
 ### Running CRISPRcleanR
 
-CRISPRcleanR is used for gene count normalization and the removal of biases for genomic segments for which copy numbers are amplified. Currently, the pipeline only supports annotation libraries already present in the R package and which can be found [here](https://github.com/francescojm/CRISPRcleanR/blob/master/Reference_Manual.pdf). To use CRISPRcleanR normalization, use `--crisprcleanr library`, `library` being the exact name as the library in the CRISPRcleanR documentation (e.g: "AVANA_Library").
+CRISPRcleanR is used for gene count normalization and the removal of biases for genomic segments for which copy numbers are amplified. Currently, the pipeline supports annotation libraries already present in the R package or a annotation file the user can provide.
+Most used library already have an annotation dataset which you can find [here](https://github.com/francescojm/CRISPRcleanR/blob/master/Reference_Manual.pdf). To use CRISPRcleanR normalization, use `--crisprcleanr library`, `library` being the exact name as the library in the CRISPRcleanR documentation (e.g: "AVANA_Library").
+Otherwise, if you wish to provide your own file, please provide it in csv form, and make sure it follows the following format :
+
+| ,CODE                | GENES             | EXONE | CHRM | STRAND | STARTpos  | ENDpos    |
+|----------------------|-------------------|-------|------|--------|-----------|-----------|
+| AAAAAAAAAAAATGCATTCT | NM_183035.1       | Defb34| ex2  | 8      | -         | 19126349  | 19126369  |
+| AAAAAAAAATAAGCTCACCC | NM_001170853.1    | Mndal | ex5  | 1      | +         | 173872968 | 173872988 |
+| AAAAAAAATCCTGTCGCCCA | NM_001039049.1    | Cox8c | ex1  | 12     | +         | 102899487 | 102899507 |
+| AAAAAAATCGGCATACCATG | NM_178627.3       | Poldip3| ex4 | 15     | -         | 83135295  | 83135315  |
+| AAAAAAATGACATTACTGCA | NM_026602.3       | Bcas2 | ex4 | 3      | +         | 103174386 | 103174406 |
 
 ### Running MAGeCK MLE and BAGEL2 with a contrast file
 
@@ -73,7 +83,7 @@ A full example can be found [here](https://raw.githubusercontent.com/nf-core/tes
 
 ### Running MAGeCK RRA only
 
-MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK rra, you can define the contrasts as previously stated in the last section and also specify `--rra` .
+MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK rra, you can define the contrasts as previously stated in the last section (with a  `.txt` extension) and also specify `--rra` .
 
 ### Running MAGeCK MLE only
 

diff --git a/modules/local/bagel2/bf.nf b/modules/local/bagel2/bf.nf
@@ -23,7 +23,7 @@ process BAGEL2_BF {
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.treatment}"
+    def prefix = task.ext.prefix ?: "${meta.treatment}_vs_${meta.reference}"
 
     """
     BAGEL.py bf -i $foldchange -o '${meta.treatment}_vs_${meta.reference}.bf' $args -e $reference_essentials -n $reference_nonessentials -c ${meta.treatment}

diff --git a/modules/local/bagel2/pr.nf b/modules/local/bagel2/pr.nf
@@ -1,5 +1,5 @@
 process BAGEL2_PR {
-    tag "$meta.treatment"
+    tag "${meta.treatment}_vs_${meta.reference}"
     label 'process_single'
 
     conda "python=3.11.4 pandas=2.0.3 numpy=1.25.1 scikit-learn=1.3.0 click=8.1.6"

diff --git a/modules/local/matricescreation.nf b/modules/local/matricescreation.nf
@@ -7,10 +7,10 @@ process MATRICESCREATION {
         'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }"
 
     input:
-    path(contrasts)
+    val(meta)
 
     output:
-    path("*.txt"), emit: design_matrix
+    tuple val(meta), path("*.txt"), emit: design_matrix
 
     when:
     task.ext.when == null || task.ext.when
@@ -22,30 +22,26 @@ process MATRICESCREATION {
     #### author: Laurence Kuhlburger
     #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text.
     ####
-    #### Orient a reference sequence according to reads orientation.
+    #### Create design matrices
 
-    data <- read.table("$contrasts", header = TRUE, sep = ";", stringsAsFactors = FALSE)
     # Loop through each row in the data
-    for (i in 1:nrow(data)) {
-        # Extract control and treatment samples for the current row
-        control_samples <- unlist(strsplit(data\$reference[i], ","))
-        treatment_samples <- unlist(strsplit(data\$treatment[i], ","))
-
-        # Create a vector of all unique samples
-        all_samples <- unique(c(control_samples, treatment_samples))
-
-        # Initialize a matrix to store the design matrix
-        design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3,
-                        dimnames = list(all_samples, c("Samples", "baseline", paste0(gsub(',', '_', data\$treatment[i] ),"_vs_", data\$reference[i])))))
-
-        # Set baseline and treatment values in the design matrix
-        design_matrix[, "Samples"] <- rownames(design_matrix)
-        design_matrix\$baseline <- 1
-        design_matrix[treatment_samples, paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]))] <- 1
-
-        # Print the design matrix to a file
-        output_file <- paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]),".txt")
-        write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE)
-    }
+    control_samples <- unlist(strsplit('${meta.reference}', ","))
+    treatment_samples <- unlist(strsplit('$meta.treatment', ","))
+    all_samples <- unique(c(control_samples, treatment_samples))
+    design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3,
+                                dimnames = list(all_samples,
+                                                c("Samples", "baseline",
+    paste0(gsub(',', '_', '$meta.treatment'),"_vs_",gsub(',','_','$meta.reference'))))))
+    name = paste0(gsub(',', '_', '$meta.treatment' ),"_vs_", gsub(',', '_','$meta.reference'))
+    # Set baseline and treatment values in the design matrix
+    design_matrix[, "Samples"] <- rownames(design_matrix)
+    design_matrix\$baseline <- 1
+    design_matrix[treatment_samples, name] <- 1
+    design_matrix[treatment_samples, paste0(gsub(',', '_', '$meta.treatment'),"_vs_",gsub(",","_",'$meta.reference'))] <- 1
+
+    # Print the design matrix to a file
+    output_file <- paste0(gsub(',', '_', '$meta.treatment' ),"_vs_",gsub(",","_",'$meta.reference'),".txt")
+    write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE)
+
     """
 }
diff --git a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff
diff --git a/modules/nf-core/crisprcleanr/normalize/main.nf b/modules/nf-core/crisprcleanr/normalize/main.nf
diff --git a/modules/nf-core/mageck/mle/mageck-mle.diff b/modules/nf-core/mageck/mle/mageck-mle.diff
diff --git a/modules/nf-core/mageck/mle/main.nf b/modules/nf-core/mageck/mle/main.nf