nextflow_schema.json

{
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "https://raw.githubusercontent.com/nf-core/crisprseq/master/nextflow_schema.json",
    "title": "nf-core/crisprseq pipeline parameters",
    "description": "Pipeline for the analysis of CRISPR data",
    "type": "object",
    "$defs": {
        "input_output_options": {
            "title": "Input/output options",
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "description": "Define where the pipeline should find input data and save output data.",
            "required": ["outdir", "analysis"],
            "properties": {
                "input": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "schema": "assets/schema_input.json",
                    "mimetype": "text/csv",
                    "pattern": "^\\S+\\.csv$",
                    "description": "Path to comma-separated file containing information about the samples in the experiment.",
                    "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/crisprseq/usage#samplesheet-input).",
                    "fa_icon": "fas fa-file-csv"
                },
                "outdir": {
                    "type": "string",
                    "format": "directory-path",
                    "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
                    "fa_icon": "fas fa-folder-open"
                },
                "analysis": {
                    "type": "string",
                    "fa_icon": "fas fa-cog",
                    "description": "Type of analysis to perform. Targeted for targeted CRISPR experiments and screening for CRISPR screening experiments.",
                    "enum": ["screening", "targeted"]
                },
                "email": {
                    "type": "string",
                    "description": "Email address for completion summary.",
                    "fa_icon": "fas fa-envelope",
                    "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                },
                "multiqc_title": {
                    "type": "string",
                    "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
                    "fa_icon": "fas fa-file-signature"
                }
            }
        },
        "targeted_pipeline_steps": {
            "title": "targeted pipeline steps",
            "type": "object",
            "description": "Alternative pipeline steps to include in the targeted analysis.",
            "default": "",
            "properties": {
                "overrepresented": {
                    "type": "boolean",
                    "fa_icon": "fas fa-sort-numeric-up-alt",
                    "description": "Trim overrepresented sequences from reads (cutadapt)"
                },
                "umi_clustering": {
                    "type": "boolean",
                    "fa_icon": "fas fa-layer-group",
                    "description": "If the sample contains umi-molecular identifyers (UMIs), run the UMI extraction, clustering and consensus steps."
                },
                "skip_clonality": {
                    "type": "boolean",
                    "fa_icon": "fas fa-clone",
                    "description": "Skip the classification of samples by clonality.",
                    "help_text": "If the step is not skipped, samples are classified into: homologous WT, homologous NHEJ or heterologous NHME."
                }
            },
            "fa_icon": "fas fa-shoe-prints"
        },
        "umi_parameters": {
            "title": "UMI parameters",
            "type": "object",
            "description": "Parameters regarding umi molecular identifiers (UMIs)",
            "default": "",
            "properties": {
                "umi_bin_size": {
                    "type": "integer",
                    "default": 1,
                    "description": "Minimum size of a UMI cluster.",
                    "fa_icon": "fas fa-sort-amount-down"
                },
                "medaka_model": {
                    "type": "string",
                    "default": "https://github.com/nanoporetech/medaka/raw/master/medaka/data/r941_min_high_g303_model.hdf5",
                    "fa_icon": "fas fa-font",
                    "description": "Medaka model (-m) to use according to the basecaller used."
                }
            },
            "fa_icon": "fas fa-layer-group"
        },
        "targeted_parameters": {
            "title": "Targeted parameters",
            "type": "object",
            "description": "Parameters used for alignment processes",
            "default": "",
            "properties": {
                "aligner": {
                    "type": "string",
                    "description": "Aligner program to use.",
                    "default": "minimap2",
                    "fa_icon": "fas fa-align-justify",
                    "enum": ["minimap2", "bwa", "bowtie2"]
                },
                "protospacer": {
                    "type": "string",
                    "pattern": "^[ACGTacgt]+$",
                    "errorMessage": "The protospacer must be a valid DNA sequence.",
                    "fa_icon": "fas fa-grip-lines",
                    "description": "Provide the same protospacer sequence for all samples. Will override protospacer sequences provided by an input samplesheet."
                }
            },
            "fa_icon": "fas fa-align-justify"
        },
        "vsearch_parameters": {
            "title": "Vsearch parameters",
            "type": "object",
            "description": "Parameters to use in Vsearch processes",
            "default": "",
            "properties": {
                "vsearch_minseqlength": {
                    "type": "integer",
                    "default": 55,
                    "fa_icon": "fas fa-minus",
                    "description": "Vsearch minimum sequence length.",
                    "help_text": "Discard sequences shorter than vsearch_minseqlength.",
                    "minimum": 1
                },
                "vsearch_maxseqlength": {
                    "type": "integer",
                    "default": 57,
                    "fa_icon": "fas fa-plus",
                    "description": "Vsearch maximum sequence length.",
                    "help_text": "Discard sequences longer than vsearch_minseqlength.",
                    "minimum": 1
                },
                "vsearch_id": {
                    "type": "number",
                    "default": 0.99,
                    "fa_icon": "fas fa-equals",
                    "description": "Vsearch pairwise identity threshold.",
                    "help_text": "Do not add the target to the cluster if the pairwise identity with the centroid is lower than id. The pairwise identity is defined as the number of (matching columns) / (alignment length - terminal gaps).",
                    "minimum": 0,
                    "maximum": 1
                }
            },
            "fa_icon": "fas fa-layer-group"
        },
        "screening_parameters": {
            "title": "Screening parameters",
            "type": "object",
            "description": "Parameters used for functional genomic screenings",
            "default": "",
            "properties": {
                "library": {
                    "type": "string",
                    "format": "file-path",
                    "pattern": "^\\S+\\.(tsv|txt)$",
                    "mimetype": "text/tsv",
                    "exists": true,
                    "fa_icon": "far fa-address-book",
                    "description": "sgRNA and targetting genes, tab separated"
                },
                "five_prime_adapter": {
                    "type": "string",
                    "description": "Sequencing adapter sequence to use for trimming on the 5' end"
                },
                "three_prime_adapter": {
                    "type": "string",
                    "description": "Sequencing adapter sequence to use for trimming on the 3' end"
                },
                "fasta": {
                    "type": "string",
                    "description": "Library in fasta file format in case you want to map with bowtie2 and then MAGeCK count",
                    "fa_icon": "fas fa-book-reader"
                },
                "day0_label": {
                    "type": "string",
                    "description": "Specify the label for control sample (usually day 0 or plasmid). For every other sample label, the module will treat it as a treatment condition and compare with control sample for MAGeCK MLE",
                    "fa_icon": "fas fa-cloud-sun"
                },
                "mle_design_matrix": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Design matrix used for MAGeCK MLE to call essential genes under multiple conditions while considering sgRNA knockout efficiency"
                },
                "mle_control_sgrna": {
                    "type": "string",
                    "description": "control-sgrna file for MAGeCK MLE"
                },
                "contrasts": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Comma-separated file with the conditions to be compared. The first one will be the reference (control)",
                    "fa_icon": "fas fa-adjust"
                },
                "mle": {
                    "type": "boolean",
                    "description": "Parameter indicating if MAGeCK MLE should be run"
                },
                "rra": {
                    "type": "boolean",
                    "description": "Parameter indicating if MAGeCK RRA should be run instead of MAGeCK MLE."
                },
                "bagel2": {
                    "type": "boolean",
                    "description": "Parameter indicating if BAGEL2 should be run"
                },
                "drugz": {
                    "type": "boolean",
                    "format": "file-path",
                    "description": "Parameter indicating if DrugZ should be run"
                },
                "count_table": {
                    "type": "string",
                    "format": "file-path",
                    "pattern": "^\\S+\\.(tsv|txt)$",
                    "mimetype": "text/tsv",
                    "exists": true,
                    "description": "Please provide your count table if the mageck test should be skipped."
                },
                "crisprcleanr": {
                    "type": "string",
                    "description": "sgRNA library annotation for crisprcleanR"
                },
                "min_reads": {
                    "type": "number",
                    "description": "a filter threshold value for sgRNAs, based on their average counts in the control sample",
                    "default": 30
                },
                "min_targeted_genes": {
                    "type": "number",
                    "description": "Minimal number of different genes targeted by sgRNAs in a biased segment in order for the corresponding counts to be corrected for CRISPRcleanR",
                    "default": 3
                },
                "bagel_reference_essentials": {
                    "type": "string",
                    "description": "Core essential gene set for BAGEL2",
                    "default": "https://raw.githubusercontent.com/hart-lab/bagel/master/CEGv2.txt"
                },
                "bagel_reference_nonessentials": {
                    "type": "string",
                    "description": "Non essential gene set  for BAGEL2",
                    "default": "https://raw.githubusercontent.com/hart-lab/bagel/master/NEGv1.txt"
                },
                "drugz_remove_genes": {
                    "type": "string",
                    "description": "Essential genes to remove from the drugZ modules",
                    "pattern": "\\\\S+"
                },
                "hitselection": {
                    "type": "boolean",
                    "description": "Specify to run the Hitselection algorithm"
                },
                "hit_selection_iteration_nb": {
                    "type": "number",
                    "description": "Number of iterations the hit selection module should provide",
                    "default": 1000
                }
            }
        },
        "reference_genome_options": {
            "title": "Reference genome options",
            "type": "object",
            "fa_icon": "fas fa-dna",
            "description": "Reference genome related files and options required for the workflow.",
            "properties": {
                "genome": {
                    "type": "string",
                    "description": "Name of iGenomes reference.",
                    "fa_icon": "fas fa-book",
                    "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
                },
                "reference_fasta": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "mimetype": "text/plain",
                    "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                    "description": "Path to the reference FASTA file. Will override reference sequences provided by an input sample sheet.",
                    "fa_icon": "far fa-file-alt"
                },
                "igenomes_ignore": {
                    "type": "boolean",
                    "description": "Do not load the iGenomes reference config.",
                    "fa_icon": "fas fa-ban",
                    "hidden": true,
                    "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
                },
                "igenomes_base": {
                    "type": "string",
                    "format": "directory-path",
                    "description": "The base path to the igenomes reference files",
                    "fa_icon": "fas fa-ban",
                    "hidden": true,
                    "default": "s3://ngi-igenomes/igenomes/"
                }
            }
        },
        "institutional_config_options": {
            "title": "Institutional config options",
            "type": "object",
            "fa_icon": "fas fa-university",
            "description": "Parameters used to describe centralised config profiles. These should not be edited.",
            "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.",
            "properties": {
                "custom_config_version": {
                    "type": "string",
                    "description": "Git commit id for Institutional configs.",
                    "default": "master",
                    "hidden": true,
                    "fa_icon": "fas fa-users-cog"
                },
                "custom_config_base": {
                    "type": "string",
                    "description": "Base directory for Institutional configs.",
                    "default": "https://raw.githubusercontent.com/nf-core/configs/master",
                    "hidden": true,
                    "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.",
                    "fa_icon": "fas fa-users-cog"
                },
                "config_profile_name": {
                    "type": "string",
                    "description": "Institutional config name.",
                    "hidden": true,
                    "fa_icon": "fas fa-users-cog"
                },
                "config_profile_description": {
                    "type": "string",
                    "description": "Institutional config description.",
                    "hidden": true,
                    "fa_icon": "fas fa-users-cog"
                },
                "config_profile_contact": {
                    "type": "string",
                    "description": "Institutional config contact information.",
                    "hidden": true,
                    "fa_icon": "fas fa-users-cog"
                },
                "config_profile_url": {
                    "type": "string",
                    "description": "Institutional config URL link.",
                    "hidden": true,
                    "fa_icon": "fas fa-users-cog"
                }
            }
        },
        "generic_options": {
            "title": "Generic options",
            "type": "object",
            "fa_icon": "fas fa-file-import",
            "description": "Less common options for the pipeline, typically set in a config file.",
            "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
            "properties": {
                "version": {
                    "type": "boolean",
                    "description": "Display version and exit.",
                    "fa_icon": "fas fa-question-circle",
                    "hidden": true
                },
                "publish_dir_mode": {
                    "type": "string",
                    "default": "copy",
                    "description": "Method used to save pipeline results to output directory.",
                    "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                    "fa_icon": "fas fa-copy",
                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                    "hidden": true
                },
                "email_on_fail": {
                    "type": "string",
                    "description": "Email address for completion summary, only when pipeline fails.",
                    "fa_icon": "fas fa-exclamation-triangle",
                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
                    "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.",
                    "hidden": true
                },
                "plaintext_email": {
                    "type": "boolean",
                    "description": "Send plain-text email instead of HTML.",
                    "fa_icon": "fas fa-remove-format",
                    "hidden": true
                },
                "max_multiqc_email_size": {
                    "type": "string",
                    "description": "File size limit when attaching MultiQC reports to summary emails.",
                    "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
                    "default": "25.MB",
                    "fa_icon": "fas fa-file-upload",
                    "hidden": true
                },
                "monochrome_logs": {
                    "type": "boolean",
                    "description": "Do not use coloured log outputs.",
                    "fa_icon": "fas fa-palette",
                    "hidden": true
                },
                "hook_url": {
                    "type": "string",
                    "description": "Incoming hook URL for messaging service",
                    "fa_icon": "fas fa-people-group",
                    "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
                    "hidden": true
                },
                "multiqc_config": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Custom config file to supply to MultiQC.",
                    "fa_icon": "fas fa-cog",
                    "hidden": true
                },
                "multiqc_logo": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
                    "fa_icon": "fas fa-image",
                    "hidden": true
                },
                "multiqc_methods_description": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Custom MultiQC yaml file containing HTML including a methods description.",
                    "fa_icon": "fas fa-cog"
                },
                "validate_params": {
                    "type": "boolean",
                    "description": "Boolean whether to validate parameters against the schema at runtime",
                    "default": true,
                    "fa_icon": "fas fa-check-square",
                    "hidden": true
                },
                "pipelines_testdata_base_path": {
                    "type": "string",
                    "fa_icon": "far fa-check-circle",
                    "description": "Base URL or local path to location of pipeline test dataset files",
                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/",
                    "hidden": true
                }
            }
        }
    },
    "allOf": [
        {
            "$ref": "#/$defs/input_output_options"
        },
        {
            "$ref": "#/$defs/reference_genome_options"
        },
        {
            "$ref": "#/$defs/targeted_pipeline_steps"
        },
        {
            "$ref": "#/$defs/umi_parameters"
        },
        {
            "$ref": "#/$defs/targeted_parameters"
        },
        {
            "$ref": "#/$defs/vsearch_parameters"
        },
        {
            "$ref": "#/$defs/screening_parameters"
        },
        {
            "$ref": "#/$defs/reference_genome_options"
        },
        {
            "$ref": "#/$defs/institutional_config_options"
        },
        {
            "$ref": "#/$defs/generic_options"
        }
    ]
}