-
Notifications
You must be signed in to change notification settings - Fork 81
/
Copy pathnextflow_schema.json
1569 lines (1569 loc) · 132 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/nf-core/eager/master/nextflow_schema.json",
"title": "nf-core/eager pipeline parameters",
"description": "A fully reproducible and state-of-the-art ancient DNA analysis pipeline",
"type": "object",
"$defs": {
"input_output_options": {
"title": "Input/output options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"],
"properties": {
"input": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/csv",
"pattern": "^\\S+\\.(c|t)sv$",
"schema": "assets/schema_input.json",
"description": "Path to tab- or comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a tab- or comma-separated file with 11 columns, and a header row. See [usage docs](https://nf-co.re/eager/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
},
"convert_inputbam": {
"type": "boolean",
"description": "Specify to convert input BAM files back to FASTQ for remapping",
"help_text": "This parameter tells the pipeline to convert the BAM files listed in the `--input` TSV or CSV sheet back to FASTQ format to allow re-preprocessing and mapping.\n\nCan be useful when you want to ensure consistent mapping parameters across all libraries when incorporating public data, however be careful of biases that may come from re-processing again (the BAM files may already be clipped, or only mapped reads with different settings are included so you may not have all reads from the original publication).",
"fa_icon": "fas fa-undo-alt"
},
"outdir": {
"type": "string",
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
},
"email": {
"type": "string",
"description": "Email address for completion summary.",
"fa_icon": "fas fa-envelope",
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"multiqc_title": {
"type": "string",
"description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
"fa_icon": "fas fa-file-signature"
}
}
},
"reference_genome_options": {
"title": "Reference genome options",
"type": "object",
"fa_icon": "fas fa-dna",
"description": "Reference genome related files and options required for the workflow.",
"properties": {
"fasta": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/plain",
"pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
"errorMessage": "The path to the reference FASTA file must not contain spaces and must have file extensions '.fasta', '.fa', '.fas', '.fna', '.fasta.gz','.fa.gz','.fas.gz' or '.fna.gz'.",
"description": "Path to FASTA file of the reference genome.",
"help_text": "This parameter is *mandatory* if `--genome` or `--fasta_sheet` are not specified. If you don't supply a mapper index (e.g. for BWA), this will be generated for you automatically. Combine with `--save_reference` to save mapper index for future runs.",
"fa_icon": "far fa-file-code"
},
"fasta_fai": {
"type": "string",
"description": "Specify path to samtools FASTA index.",
"help_text": "If you want to use a pre-existing `samtools faidx` index, use this to specify the required FASTA index file for the selected reference genome. This should be generated by samtools faidx and has a file suffix of `.fai`.",
"fa_icon": "fas fa-address-book"
},
"fasta_dict": {
"type": "string",
"description": "Specify path to Picard sequence dictionary file.",
"help_text": "If you want to use a pre-existing `picard CreateSequenceDictionary` dictionary file, use this to specify the required `.dict` file for the selected reference genome.",
"fa_icon": "fas fa-address-book"
},
"fasta_mapperindexdir": {
"type": "string",
"description": "Specify path to directory containing index files of the FASTA for a given mapper.",
"help_text": "For most people this will likely be the same directory that contains the file you provided to `--fasta`.\n\nIf you want to use pre-existing `bwa index` indices, the directory should contain files ending in '.amb' '.ann' '.bwt'. If you want to use pre-existing `bowtie2 build` indices, the directory should contain files ending in'.1.bt2', '.2.bt2', '.rev.1.bt2'.\n\nIn any case do not include the files themselves in the path. nf-core/eager will automagically detect the index files by searching for the FASTA filename with the corresponding `bwa index`/`bowtie2 build` file suffixes. If not supplied, the indices will be generated for you.\n\n",
"fa_icon": "fas fa-folder-open"
},
"fasta_largeref": {
"type": "boolean",
"description": "Specify to generate '.csi' BAM indices instead of '.bai' for larger reference genomes.",
"help_text": "This parameter is required to be set for large reference genomes. If your reference genome is larger than 3.5GB, the `samtools index` calls in the pipeline need to generate `.csi` indices instead of `.bai` indices to compensate for the size of the reference genome (with samtools: `-c`). This parameter is not required for smaller references (including the human reference genomes hg19 or grch37/grch38).",
"fa_icon": "fas fa-address-book"
},
"save_reference": {
"type": "boolean",
"description": "Specify to save any pipeline-generated reference genome indices in the results directory.",
"help_text": "Use this if you do not have pre-made reference FASTA indices for `bwa`, `samtools` and `picard`. If you turn this on, the indices nf-core/eager generates for you and will be saved in the `<your_output_dir>/results/reference_genomes` for you. If not supplied, nf-core/eager generated index references will be deleted.\n\n> Modifies SAMtools index command: `-c`",
"fa_icon": "fas fa-save"
},
"fasta_sheet": {
"type": "string",
"format": "file-path",
"exists": true,
"mimetype": "text/csv",
"pattern": "^\\S+\\.(c|t)sv$",
"schema": "assets/schema_fasta.json",
"description": "Path to a tab-/comma-separated file containing reference-specific files.",
"help_text": "This parameter is *mandatory* if `--genome` or `--fasta` are not specified. If you don't supply a mapper index (e.g. for BWA), this will be generated for you automatically.",
"errorMessage": "The path to the reference sheet must not contain spaces and have file extension '.csv' or '.tsv'.",
"fa_icon": "fas fa-table"
},
"genome": {
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
"hidden": true
},
"igenomes_base": {
"type": "string",
"format": "directory-path",
"description": "Directory / URL base for iGenomes references.",
"default": "s3://ngi-igenomes/igenomes/",
"fa_icon": "fas fa-cloud-download-alt",
"hidden": true
},
"igenomes_ignore": {
"type": "boolean",
"description": "Do not load the iGenomes reference config.",
"fa_icon": "fas fa-ban",
"hidden": true,
"help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
},
"fasta_circular_target": {
"type": "string",
"description": "Specify the FASTA header of the extended chromosome when using `circularmapper`.",
"help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`",
"fa_icon": "fas fa-bullseye"
},
"fasta_circularmapper_elongationfactor": {
"type": "integer",
"default": 500,
"description": "Specify the number of bases to extend reference by (circularmapper only).",
"help_text": "The number of bases to extend the beginning and end of each reference genome with.",
"fa_icon": "fas fa-external-link-alt"
},
"fasta_circularmapper_elongatedfasta": {
"type": "string",
"description": "Specify an elongated reference FASTA to be used for circularmapper.",
"help_text": "Specify an already elongated FASTA file for circularmapper to avoid regeneration.",
"fa_icon": "fas fa-address-book"
},
"fasta_circularmapper_elongatedindex": {
"type": "string",
"description": "Specify a samtools index for the elongated FASTA file.",
"help_text": "Specify the index for an already elongated FASTA file to avoid regeneration.",
"fa_icon": "fas fa-address-book"
}
}
},
"institutional_config_options": {
"title": "Institutional config options",
"type": "object",
"fa_icon": "fas fa-university",
"description": "Parameters used to describe centralised config profiles. These should not be edited.",
"help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.",
"properties": {
"custom_config_version": {
"type": "string",
"description": "Git commit id for Institutional configs.",
"default": "master",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"custom_config_base": {
"type": "string",
"description": "Base directory for Institutional configs.",
"default": "https://raw.githubusercontent.com/nf-core/configs/master",
"hidden": true,
"help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.",
"fa_icon": "fas fa-users-cog"
},
"config_profile_name": {
"type": "string",
"description": "Institutional config name.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_description": {
"type": "string",
"description": "Institutional config description.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_contact": {
"type": "string",
"description": "Institutional config contact information.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_url": {
"type": "string",
"description": "Institutional config URL link.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
}
}
},
"generic_options": {
"title": "Generic options",
"type": "object",
"fa_icon": "fas fa-file-import",
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"version": {
"type": "boolean",
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
"hidden": true
},
"publish_dir_mode": {
"type": "string",
"default": "copy",
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"hidden": true
},
"email_on_fail": {
"type": "string",
"description": "Email address for completion summary, only when pipeline fails.",
"fa_icon": "fas fa-exclamation-triangle",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
"help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.",
"hidden": true
},
"plaintext_email": {
"type": "boolean",
"description": "Send plain-text email instead of HTML.",
"fa_icon": "fas fa-remove-format",
"hidden": true
},
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
"fa_icon": "fas fa-palette",
"hidden": true
},
"hook_url": {
"type": "string",
"description": "Incoming hook URL for messaging service",
"fa_icon": "fas fa-people-group",
"help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"multiqc_config": {
"type": "string",
"format": "file-path",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"multiqc_logo": {
"type": "string",
"description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
"fa_icon": "fas fa-image",
"hidden": true
},
"multiqc_methods_description": {
"type": "string",
"description": "Custom MultiQC yaml file containing HTML including a methods description.",
"fa_icon": "fas fa-cog"
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
},
"pipelines_testdata_base_path": {
"type": "string",
"fa_icon": "far fa-check-circle",
"description": "Base URL or local path to location of pipeline test dataset files",
"default": "https://raw.githubusercontent.com/nf-core/test-datasets/",
"hidden": true
},
"trace_report_suffix": {
"type": "string",
"fa_icon": "far calendar",
"description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.",
"hidden": true
}
}
},
"preprocessing": {
"title": "Preprocessing",
"type": "object",
"description": "Removal of adapters, paired-end merging, poly-G removal, etc.",
"default": "",
"properties": {
"sequencing_qc_tool": {
"type": "string",
"default": "fastqc",
"description": "Specify which tool to use for sequencing quality control.",
"help_text": "Specify which tool to use for sequencing quality control.\n\nFalco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We recommend using falco with very large datasets (due to reduced memory constraints).",
"enum": ["fastqc", "falco"],
"fa_icon": "fas fa-hammer"
},
"skip_preprocessing": {
"type": "boolean",
"description": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-G trimming, etc).",
"help_text": "Specify to skip all preprocessing steps (adapter removal, paired-end merging, poly-G trimming etc).\n\nThis will also mean you will only get one set of FastQC results (of the input reads).",
"fa_icon": "fas fa-forward"
},
"preprocessing_tool": {
"type": "string",
"default": "fastp",
"description": "Specify which preprocessing tool to use.",
"enum": ["fastp", "adapterremoval"],
"help_text": "Specify which preprocessing tool to use.\n\nAdapterRemoval is commonly used in palaeogenomics, however fastp has similar performance and has many additional functionality (including inbuilt complexity trimming) that can be often useful.",
"fa_icon": "fas fa-hammer"
},
"preprocessing_skippairmerging": {
"type": "boolean",
"description": "Specify to skip read-pair merging.",
"fa_icon": "fas fa-forward",
"help_text": "Turns off the paired-end read merging, and will result in paired-end mapping modes being used during reference of reads again alignment.\n\nThis can be useful in cases where you have long ancient DNA reads, modern DNA or when you want to utilise mate-pair 'spatial' information.\n\n ⚠️ If you run this with --preprocessing_minlength set to a value (as is by default!), you may end up removing single reads from either the pair1 or pair2 file. These reads will be NOT be mapped when aligning with either BWA or bowtie, as both can only accept one (forward) or two (forward and reverse) FASTQs as input in paired-end mode.\n\n> ⚠️ If you run metagenomic screening as well as skipping merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies AdapterRemoval parameter: `--collapse`\n> Modifies fastp parameter: `--merge`"
},
"preprocessing_excludeunmerged": {
"type": "boolean",
"description": "Specify to exclude read-pairs that did not overlap sufficiently for merging (i.e., keep merged reads only).",
"fa_icon": "fas fa-trash-alt",
"help_text": "Specify to exclude read-pairs that did not overlap sufficiently for merging (i.e., keep merged reads only). Singletons (i.e. reads missing a pair) or un-merged reads (where there wasn't sufficient overlap) are discarded.\n\nMost ancient DNA molecules are very short, and the majority are expected to merge. Specifying this parameter can sometimes be useful when dealing with ultra-short aDNA reads to reduce the number of longer-reads you may have in your library that are derived from modern contamination. It can also speed up run time of mapping steps.\n\nYou may want to use this if you want ensure only the best quality reads for your analysis, but with the penalty of potentially losing still valid data (even if some reads have slightly lower quality and/or are longer). It is highly recommended when using 'dedup' deduplication tool."
},
"preprocessing_skipadaptertrim": {
"type": "boolean",
"description": "Specify to skip removal of adapters.",
"help_text": "Specify to turn off trimming of adapters from reads.\n\nYou may wish to do this if you are using publicly available data, that _should_ have all library artefacts from reads removed.\n\nThis will override any other adapter parameters provided (i.e, `--preprocessing_adapterlist` and `--preprocessing_adapter{1,2}` will be ignored)!\n\n> Modifies AdapterRemoval parameter: `--adapter1` and `--adapter2` (sets both to an empty string)\n> Applies fastp parameter: `--disable_adapter_trimming`",
"fa_icon": "fas fa-forward"
},
"preprocessing_adapter1": {
"type": "string",
"description": "Specify the nucleotide sequence for the forward read/R1.",
"fa_icon": "fas fa-grip-lines",
"help_text": "Specify a nucleotide sequence for the forward read/R1.\n\nIf not modified by the user, the default for the particular preprocessing tool will be used. Therefore, to turn off adapter trimming use `--preprocessing_skipadaptertrim`.\n\n> Modifies AdapterRemoval parameter: `--adapter1`\n> Modifies fastp parameter: `--adapter_sequence`"
},
"preprocessing_adapter2": {
"type": "string",
"description": "Specify the nucleotide sequence for the reverse read/R2.",
"fa_icon": "fas fa-grip-lines",
"help_text": "Specify a nucleotide sequence for the forward read/R2.\n\nIf not modified by the user, the default for the particular preprocessing tool will be used. To turn off adapter trimming use `--preprocessing_skipadaptertrim`.\n\n> Modifies AdapterRemoval parameter: `--adapter2`\n> Modifies fastp parameter: `--adapter_sequence_r2`"
},
"preprocessing_adapterlist": {
"type": "string",
"description": "Specify a list of all possible adapters to trim.",
"help_text": "Specify a file with a list of adapter (combinations) to remove from all files.\n\nOverrides the `--preprocessing_adapter1`/`--preprocessing_adapter2` parameters.\n\nNote that the two tools have slightly different behaviours.\n\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. Only Adapters in this list will be screened for and removed. See AdapterRemoval documentation for more information.\n\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp will first perform auto-detection and removal of adapters and then _additionally_ remove adapters present in the FASTA file one by one will.\n\n> Modifies AdapterRemoval parameter: `--adapter-list`\n> Modifies fastp parameter: `--adapter_fasta`",
"fa_icon": "fas fa-list"
},
"preprocessing_minlength": {
"type": "integer",
"default": 25,
"description": "Specify the minimum length reads must have to be retained.",
"help_text": "Specify the minimum length reads must have to be retained.\n\nReads smaller than this length after trimming are discarded and not included in downstream analyses. Typically in ancient DNA, users will set this to 30 or for very old samples around 25 bp - reads any shorter that this often are not specific enough to provide useful information.\n\n> Modifies AdapterRemoval parameter: `--minlength`\n> Modifies fastp parameter: `--length_required`",
"fa_icon": "fas fa-ruler-horizontal"
},
"preprocessing_trim5p": {
"type": "integer",
"default": 0,
"description": "Specify number of bases to hard-trim from 5 prime or front of reads.",
"help_text": "Specify number of bases to hard-trim from 5 prime or front of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n ⚠️ When this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior to mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie 2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 5p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore, this is more suitable for hard-removal of damage before mapping (however the Bowtie 2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim5p`\n> Modifies fastp parameters: `--trim_front1` and/or `--trim_front2`",
"fa_icon": "fas fa-cut"
},
"preprocessing_trim3p": {
"type": "integer",
"default": 0,
"description": "Specify number of bases to hard-trim from 3 prime or tail of reads.",
"fa_icon": "fas fa-cut",
"help_text": "Specify number of bases to hard-trim from 3 prime or tail of reads. Exact behaviour varies per tool, see documentation. By default set to `0` to not perform any hard trimming.\n\nThis parameter allows users to 'hard' remove a number of bases from the beginning or end of reads, regardless of quality.\n\n⚠️ When this trimming occurs depends on the tool, i.e., the exact behaviour is not the same between AdapterRemoval and fastp.\n\nFor fastp: 5p/3p trimming occurs _prior_ to any other trimming (quality, poly-G, adapter). Please see the [fastp documentation](https://github.com/OpenGene/fastp#global-trimming) for more information. If you wish to use this to remove damage prior to mapping (to allow more specific mapping), ensure you have manually removed adapters/quality trimmed **prior** to giving the reads to nf-core/eager. Alternatively, you can use Bowtie 2's inbuilt pre-mapping read-end trimming functionality. Note that nf-core/eager only allows this hard trimming equally for both forward and reverse reads (i.e., you cannot provide different values for the 3p end for R1 and R2).\n\nFor AdapterRemoval, this trimming happens _after_ the removal of adapters, however prior to quality trimming. Therefore this is more suitable for hard-removal of damage before mapping (however the Bowtie 2 system will be more reliable).\n\n> Modifies AdapterRemoval parameters: `--trim3p`\n> Modifies fastp parameters: `--trim_tail1` and/or `--trim_tail2`"
},
"preprocessing_savepreprocessedreads": {
"type": "boolean",
"description": "Specify to save the preprocessed reads in the results directory.",
"fa_icon": "fas fa-save",
"help_text": "Specify to save the preprocessed reads in FASTQ format the results directory.\n\nThis can be useful for re-analysing FASTQ files manually, or uploading to public data repositories such as ENA/SRA (provided you don't filter by length or merge paired reads)."
},
"preprocessing_fastp_complexityfilter": {
"type": "boolean",
"description": "Specify to turn on sequence complexity filtering of reads.",
"help_text": "Performs a poly-G tail removal step in the beginning of the pipeline using fastp.\n\nThis can be useful for trimming ploy-G tails from short-fragments sequenced on two-colour Illumina chemistry such as NextSeqs or NovaSeqs (where no-fluorescence is read as a G on two-colour chemistry), which can inflate reported GC content values.\n\n> Modifies fastp parameter: `--trim_poly_g`",
"fa_icon": "fas fa-power-off"
},
"preprocessing_fastp_complexityfilter_threshold": {
"type": "integer",
"default": 10,
"description": "Specify the complexity threshold that must be reached or exceeded to retain reads.",
"help_text": "This option can be used to define the minimum length of a poly-G tail to begin low complexity trimming.\n\n> Modifies fastp parameter: `--poly_g_min_len`",
"fa_icon": "fas fa-filter"
},
"preprocessing_adapterremoval_preserve5p": {
"type": "boolean",
"description": "Skip AdapterRemoval quality and N base trimming at 5 prime end.",
"help_text": "Turns off quality based trimming at the 5p end of reads when any of the AdapterRemoval quality or N trimming options are used. Only 3p end of reads will be removed.\n\nThis also entirely disables quality based trimming of collapsed reads, since both ends of these are informative for PCR duplicate filtering. For more information see the AdapterRemoval [documentation](https://adapterremoval.readthedocs.io/en/stable/manpage.html#cmdoption-adapterremoval-preserve5p).\n\n> Modifies AdapterRemoval parameters: `--preserve5p`",
"fa_icon": "fas fa-shield-alt"
},
"preprocessing_adapterremoval_skipqualitytrimming": {
"type": "boolean",
"description": "Specify to skip AdapterRemoval quality and N trimming at the ends of reads.",
"help_text": "Turns off AdapterRemoval quality trimming from ends of reads.\n\nThis can be useful to reduce runtime when running public data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimqualities` ",
"fa_icon": "fas fa-forward"
},
"preprocessing_adapterremoval_trimbasequalitymin": {
"type": "integer",
"default": 20,
"description": "Specify AdapterRemoval minimum base quality for trimming off bases.",
"help_text": "Defines the minimum read quality per base that is required for a base to be kept by AdapterRemoval. Individual bases at the ends of reads falling below this threshold will be clipped off.\n\n> Modifies AdapterRemoval parameter: `--minquality`",
"fa_icon": "fas fa-filter"
},
"preprocessing_adapterremoval_skipntrimming": {
"type": "boolean",
"description": "Specify to skip AdapterRemoval N trimming (quality trimming only).",
"help_text": "Turns off AdapterRemoval N trimming from ends of reads.\n\nThis can be useful to reduce runtime when running publicly available data that has already been processed.\n\n> Modifies AdapterRemoval parameters: `--trimns` ",
"fa_icon": "fas fa-forward"
},
"preprocessing_adapterremoval_adapteroverlap": {
"type": "integer",
"default": 1,
"description": "Specify the AdapterRemoval minimum adapter overlap required for trimming.",
"fa_icon": "fas fa-filter",
"help_text": "Specifies a minimum number of bases that overlap with the adapter sequence before AdapterRemoval trims adapters sequences from reads.\n\n> Modifies AdapterRemoval parameter: `--minadapteroverlap`"
},
"preprocessing_adapterremoval_qualitymax": {
"type": "integer",
"default": 41,
"description": "Specify the AdapterRemoval maximum Phred score used in input FASTQ files.",
"help_text": "Specify maximum Phred score of the quality field of FASTQ files.\n\nThe quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of 41.\n\nNote that while this can theoretically provide you with more confident and precise base call information, many downstream tools only accept FASTQ files with Phred scores limited to a max of 41, and therefore increasing the default for this parameter may make the resulting preprocessed files incompatible with some downstream tools.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`",
"fa_icon": "fas fa-tachometer-alt"
}
},
"fa_icon": "fas fa-cut"
},
"mapping": {
"title": "Mapping",
"type": "object",
"description": "Options for aligning reads against reference genome(s)",
"default": "",
"properties": {
"run_fastq_sharding": {
"type": "boolean",
"description": "Specify to turn on FASTQ sharding.",
"fa_icon": "fas fa-power-off",
"help_text": "Sharding will split the FASTQs into smaller chunks before mapping. These chunks are then mapped in parallel. This approach can speed up the mapping process for larger FASTQ files."
},
"fastq_shard_size": {
"type": "integer",
"default": 1000000,
"description": "Specify the number of reads in each shard when splitting.",
"fa_icon": "fas fa-arrows-alt-v",
"help_text": "Make sure to choose a value that makes sense for your dataset. Small values can create many files, which can end up negatively affecting the overall speed of the mapping process."
},
"mapping_tool": {
"type": "string",
"default": "bwaaln",
"enum": ["bwaaln", "bwamem", "bowtie2", "circularmapper"],
"description": "Specify which mapper to use.",
"help_text": "Specify which mapping tool to use. Options are BWA aln ('`bwaaln`'), BWA mem ('`bwamem`'), circularmapper ('`circularmapper`'), or Bowtie 2 ('`bowtie2`'). BWA aln is the default and highly suited for short-read ancient DNA. BWA mem can be quite useful for modern DNA, but is rarely used in projects for ancient DNA. CircularMapper enhances the mapping procedure to circular references, using the BWA algorithm but utilizing an extend-remap procedure (see [Peltzer et al 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). Bowtie 2 is similar to BWA aln, and has recently been suggested to provide slightly better results under certain conditions ([Poullet and Orlando 2020](https://doi.org/10.3389/fevo.2020.00105)), as well as providing extra functionality (such as FASTQ trimming).\n\nMore documentation can be seen for each tool under:\n\n- [BWA aln](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [BWA mem](http://bio-bwa.sourceforge.net/bwa.shtml#3)\n- [CircularMapper](https://circularmapper.readthedocs.io/en/latest/contents/userguide.html)\n- [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line)",
"fa_icon": "fas fa-hammer"
},
"mapping_bwaaln_n": {
"type": "number",
"default": 0.01,
"description": "Specify the amount of allowed mismatches in the alignment for mapping with BWA aln.",
"help_text": "Specify how many mismatches are allowed in a read during alignment with BWA aln. Default is set following recommendations from [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who compared alignment to human reference genomes.\n\nIf you're uncertain what value to use, check out this [Shiny App](https://apeltzer.shinyapps.io/bwa-mismatches/) for more information.\n\n> Modifies BWA aln parameter: `-n`",
"fa_icon": "fas fa-sort-numeric-down"
},
"mapping_bwaaln_k": {
"type": "integer",
"default": 2,
"description": "Specify the maximum edit distance allowed in a seed for mapping with BWA aln.",
"help_text": "Specify the maximum edit distance during the seeding phase of the BWA aln mapping algorithm.\n\n> Modifies BWA aln parameter: `-k`",
"fa_icon": "fas fa-people-arrows"
},
"mapping_bwaaln_l": {
"type": "integer",
"default": 1024,
"description": "Specify the length of seeds to be used for BWA aln.",
"help_text": "Specify the length of the seed used in BWA aln. Default is set to be 'turned off' at the recommendation of [Oliva et al. 2021](https://doi.org/10.1093/bib/bbab076) who tested when aligning to human reference genomes. Seeding is 'turned off' by specifying an arbitrarily long number to force the entire read to act as the seed.\n\nNote: Despite being recommended, turning off seeding can result in long runtimes!\n\n> Modifies BWA aln parameter: `-l`",
"fa_icon": "fas fa-ruler-horizontal"
},
"mapping_bwaaln_o": {
"type": "integer",
"default": 2,
"description": "Specify the number of gaps allowed for alignment with BWA aln.",
"help_text": "Specify the number of gaps allowed for mapping with BWA aln. Default is set to BWA default.\n\n> Modifies BWA aln parameter: `-o`",
"fa_icon": "fas fa-people-arrows"
},
"mapping_bwamem_k": {
"type": "integer",
"default": 19,
"description": "Specify the minimum seed length for alignment with BWA mem.",
"help_text": "Configures the minimum seed length used in BWA mem. Default is set to BWA default.\n\n> Modifies BWA mem parameter: `-k`",
"fa_icon": "fas fa-seedling"
},
"mapping_bwamem_r": {
"type": "number",
"default": 1.5,
"description": "Specify the re-seeding threshold for alignment with BWA mem.",
"help_text": "Configures the re-seeding threshold used in BWA mem. Default is set to BWA default.\n\n> Modifies BWA mem parameter: `-r`",
"fa_icon": "fas fa-angle-double-left"
},
"mapping_bowtie2_alignmode": {
"type": "string",
"default": "local",
"description": "Specify the Bowtie 2 alignment mode.",
"help_text": "Specify the type of read alignment to use with Bowtie 2. 'Local' allows only partial alignment of read with ends of reads possibly 'soft-clipped' (i.e. remain unaligned/ignored), if the soft-clipped alignment provides best alignment score. 'End-to-end' requires all nucleotides to be aligned.\nDefault is set following [Cahill et al (2018)](https://doi.org/10.1093/molbev/msy018) and [Poullet and Orlando 2020](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full)\n\n> Modifies Bowtie 2 presets: `--local`, `--end-to-end`",
"fa_icon": "fas fa-toggle-on",
"enum": ["local", "end-to-end"]
},
"mapping_bowtie2_sensitivity": {
"type": "string",
"default": "sensitive",
"description": "Specify the level of sensitivity for the Bowtie 2 alignment mode.",
"help_text": "Specify the Bowtie 2 'preset' to use. These strings apply to both `--mapping_bowtie2_alignmode` options. See the Bowtie 2 manual for actual settings.\nDefault is set following [Poullet and Orlando (2020)](https://www.frontiersin.org/articles/10.3389/fevo.2020.00105/full), when running damaged-data without UDG treatment.\n\n> Modifies the Bowtie 2 parameters: `--fast`, `--very-fast`, `--sensitive`, `--very-sensitive`, `--fast-local`, `--very-fast-local`, `--sensitive-local`, `--very-sensitive-local`",
"fa_icon": "fas fa-microscope",
"enum": ["fast", "very-fast", "sensitive", "very-sensitive"]
},
"mapping_bowtie2_n": {
"type": "integer",
"default": 0,
"description": "Specify the number of mismatches in seed for alignment with Bowtie 2.",
"help_text": "Specify the number of mismatches allowed in the seed during seed-and-extend procedure of Bowtie 2. This will override any values set with `--mapping_bowtie2_sensitivity`. Can either be 0 or 1.\n\n> Modifies Bowtie 2 parameter: `-N`",
"fa_icon": "fas fa-sort-numeric-down"
},
"mapping_bowtie2_l": {
"type": "integer",
"default": 20,
"description": "Specify the length of seed substrings for Bowtie 2.",
"help_text": "Specify the length of the seed sub-string to use during seeding of Bowtie 2. This will override any values set with `--mapping_bowtie2_sensitivity`.\n\n> Modifies Bowtie 2 parameter: `-L`",
"fa_icon": "fas fa-ruler-horizontal"
},
"mapping_bowtie2_trim5": {
"type": "integer",
"default": 0,
"description": "Specify the number of bases to trim off from 5 prime end of read before alignment with Bowtie 2.",
"help_text": "Specify the number of bases to trim at the 5' (left) end of read before alignment with Bowtie 2. This may be useful when left-over sequencing artefacts of in-line barcodes are present.\n\n> Modifies Bowtie 2 parameter: `--trim5`",
"fa_icon": "fas fa-cut"
},
"mapping_bowtie2_trim3": {
"type": "integer",
"default": 0,
"description": "Specify the number of bases to trim off from 3 prime end of read before alignment with Bowtie 2.",
"help_text": "Specify the number of bases to trim at the 3' (right) end of read before alignment with Bowtie 2. This may be useful when left-over sequencing artefacts of in-line barcodes are present.\n\n> Modifies Bowtie 2 parameter: `--trim3`",
"fa_icon": "fas fa-cut"
},
"mapping_bowtie2_maxins": {
"type": "integer",
"default": 500,
"description": "Specify the maximum fragment length for Bowtie2 paired-end mapping mode only.",
"help_text": "The maximum fragment for valid paired-end alignments. Only for paired-end mapping (i.e. unmerged), and therefore typically only useful for modern data.\n\n> Modifies Bowtie2 parameter: `--maxins`",
"fa_icon": "fas fa-exchange-alt"
},
"mapping_circularmapper_circularfilter": {
"type": "boolean",
"fa_icon": "fas fa-filter",
"description": "Turn on to remove reads that did not map to the circularised genome.",
"help_text": "If you want to filter out reads that don't map to elongated/circularised chromosome (and also non-circular chromosome headers) from the resulting BAM file, turn this on.\n\n> Modifies `-f` and `-x` parameters of CircularMapper's RealignSAMFile"
}
},
"fa_icon": "fas fa-layer-group"
},
"bam_filtering": {
"title": "BAM Filtering",
"type": "object",
"description": "Options related to length, quality, and map status filtering of reads.",
"default": "",
"properties": {
"run_bamfiltering": {
"type": "boolean",
"description": "Specify to turn on filtering of reads in BAM files after mapping. By default, only mapped reads retained.",
"fa_icon": "fas fa-power-off",
"help_text": "Turns on the filtering subworkflow for mapped BAM files after the read alignment step. Filtering includes removal of unmapped reads, length filtering, and mapping quality filtering.\n\nWhen turning on BAM filtering, by default only the mapped/unmapped filter is activated, thus only mapped reads are retained for downstream analyses. See `--bamfiltering_retainunmappedgenomicbam` to retain unmapped reads, if filtering only for length and/or quality is preferred.\n\nNote this subworkflow can also be activated if `--run_metagenomics` is supplied."
},
"bamfiltering_minreadlength": {
"type": "integer",
"default": 0,
"description": "Specify the minimum read length mapped reads should have for downstream genomic analysis.",
"help_text": "Specify to remove mapped reads that fall below a certain length threshold after mapping.\n\nThis can be useful to get more realistic 'endogenous DNA' or 'on target read' percentages.\n\nIf used _instead of_ minimum length read filtering at AdapterRemoval, you can get more more realistic endogenous DNA estimates when most of your reads are very short (e.g. in single-stranded libraries or samples with highly degraded DNA). In these cases, the default minimum length filter at earlier adapter clipping/read merging will remove a very large amount of your reads in your library (including valid reads), thus making an artificially small denominator for a typical endogenous DNA calculation.\n\nTherefore by retaining all of your reads until _after_ mapping (i.e., turning off the adapter clipping/read merging filter), you can generate more 'real' endogenous DNA estimates immediately after mapping (with a better denominator). Then after estimating this, filter using this parameter to retain only 'useful' reads (i.e., those long enough to provide higher confidence of their mapped position) for downstream analyses.\n\nBy specifying `0`, no length filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies filter_bam_fragment_length.py parameter: `-l`",
"fa_icon": "fas fa-filter"
},
"bamfiltering_mappingquality": {
"type": "integer",
"default": 0,
"description": "Specify the minimum mapping quality reads should have for downstream genomic analysis.",
"help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis.\n\nBy default all reads are retained and this option is therefore set to 0 to ensure no quality filtering is performed.\n\nNote that by default the output BAM files of this step are _not_ stored in the results directory (as it is assumed that deduplicated BAM files are preferred). See `--bamfiltering_savefilteredbams` if you wish to save these.\n\n> Modifies samtools view parameter: `-q`",
"fa_icon": "fas fa-filter"
},
"bamfilter_genomicbamfilterflag": {
"type": "integer",
"default": 4,
"fa_icon": "fas fa-flag",
"description": "Specify the SAM format flag of reads to remove during BAM filtering for downstream genomic steps.",
"help_text": "Specify to customise the exact SAM format flag of reads you wish to _remove_ from your BAM file to for downstream _genomic_ analyses.\n\nYou can explore more using a tool from the Broad Institute [here](https://broadinstitute.github.io/picard/explain-flags.html)\n\n> ⚠️ Modify at your own risk, alternative flags are not necessarily supported in downstream steps!\n\n> Modifies samtools parameter: `-F`"
},
"bamfiltering_retainunmappedgenomicbam": {
"type": "boolean",
"description": "Specify to retain unmapped reads in the BAM file used for downstream genomic analyses.",
"help_text": "Specify to retain unmapped reads (optionally also length filtered) in the genomic BAM for downstream analysis. By default, the pipeline only keeps mapped reads for downstream analysis.\n\nThis is also turned on if `--metagenomics_input` is set to `all`.\n\n> ⚠️ This will likely slow down run time of downstream pipeline steps!\n\n> Modifies tool parameter(s):\n> - samtools view: `-f 4` / `-F 4`",
"fa_icon": "fas fa-piggy-bank"
},
"bamfiltering_generateunmappedfastq": {
"type": "boolean",
"description": "Specify to generate FASTQ files containing only unmapped reads from the aligner generated BAM files.",
"help_text": "Specify to turn on the generation and saving of FASTQs of only the unmapped reads from the mapping step in the results directory.\n\nThis can be useful if you wish to do other analysis of the unmapped reads independently of the pipeline.\n\nNote: the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies samtools fastq parameter: `-f 4`",
"fa_icon": "fas fa-file-alt"
},
"bamfiltering_generatemappedfastq": {
"type": "boolean",
"description": "Specify to generate FASTQ files containing only mapped reads from the aligner generated BAM files.",
"help_text": "Specify to turn on the generation and saving of FASTQs of only the mapped reads from the mapping step in the results directory.\n\nThis can be useful if you wish to do other analysis of the mapped reads independently of the pipeline, such as remapping with different parameters (whereby only including mapped reads will speed up computation time during the re-mapping due to reduced input data).\n\nNote the reads in these FASTQ files have _not_ undergone length of quality filtering\n\n> Modifies samtools fastq parameter: `-F 4`",
"fa_icon": "far fa-file-alt"
},
"bamfiltering_savefilteredbams": {
"type": "boolean",
"description": "Specify to save the intermediate filtered genomic BAM files in the results directory.",
"help_text": "Specify to save intermediate length- and/or quality-filtered genomic BAM files in the results directory.",
"fa_icon": "fas fa-save"
}
},
"fa_icon": "fas fa-filter"
},
"metagenomics": {
"title": "Metagenomics",
"type": "object",
"description": "Options related to metagenomic screening.",
"default": "",
"properties": {
"run_metagenomics": {
"type": "boolean",
"description": "Specify to turn on metagenomic screening of mapped, unmapped or all reads.",
"fa_icon": "fas fa-power-off",
"help_text": "Specify to turn on the metagenomic screening subworkflow of the pipeline, where reads are screened against large databases. Typically used for pathogen screening or microbial community analysis.\n\nIf supplied, this will also turn on the BAM filtering subworkflow of the pipeline.\n\nRequires subsequent specification of `--metagenomics_profiling_tool` and `--metagenomics_profiling_database`."
},
"metagenomics_input": {
"type": "string",
"default": "unmapped",
"description": "Specify which type of reads to use for metagenomic screening.",
"enum": ["unmapped", "mapped", "all"],
"fa_icon": "fas fa-hand-pointer",
"help_text": "Specify to select which mapped reads will be sent for metagenomic analysis.\n\nThis influences which reads are sent to this step, whether you want unmapped reads (used in most cases, as 'host reads' can often be contaminants in microbial genomes), mapped reads (e.g, when doing competitive against a genomic reference of multiple genomes and which to apply LCA correction) or all reads.\n\n> ⚠️ If you skip paired-end merging, all reads will be screened as independent reads - not as pairs! - as all FASTQ files from BAM filtering are merged into one. This merged file is _not_ saved in results directory.\n\n> Modifies samtools fastq parameters: `-f 4` / `-F 4`"
},
"run_metagenomics_complexityfiltering": {
"type": "boolean",
"fa_icon": "fas fa-power-off",
"help_text": "Specify to turn on a subworkflow of the pipeline that filters the FASTQ files for complexity before the metagenomics profiling.\nUse the `--metagenomics_complexity_tool` parameter to select a method.",
"description": "Specify to run a complexity filter on the metagenomics input files before classification."
},
"metagenomics_complexity_savefastq": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Specify to save FASTQ files containing the complexity-filtered reads before metagenomic classification.",
"help_text": "Specify to save the complexity-filtered FASTQ files to the results directory."
},
"metagenomics_complexity_tool": {
"type": "string",
"default": "bbduk",
"description": "Specify which tool to use for trimming, filtering or reformatting of FASTQ reads that go into metagenomics screening.",
"enum": ["bbduk", "prinseq"],
"fa_icon": "fas fa-hammer",
"help_text": "Specify to select which tool is used to generate a final set of reads for the metagenomic classifier after any necessary trimming, filtering or reformatting of the reads.\n\nThis intermediate file is not saved in the results directory unless marked with `--metagenomics_complexity_savefastq`."
},
"metagenomics_complexity_entropy": {
"type": "number",
"fa_icon": "fas fa-sort-numeric-up",
"description": "Specify the entropy threshold under which a sequencing read will be complexity-filtered out.",
"default": 0.3,
"help_text": "Specify the minimum 'entropy' value for complexity filtering for the BBDuk or PRINSEQ++ tools.\n\nThis value will only be used for PRINSEQ++ if `--metagenomics_prinseq_mode` is set to `entropy`.\n\nEntropy here corresponds to the amount of sequence variation existing within the read. Higher values correspond to more variety and thus will likely result in more specific matching to a taxon's reference genome. The trade-off here is fewer reads (or abundance information) available for having a confident identification.\n\n> Modifies parameters:\n> - BBDuk: `entropy=`\n> - PRINSEQ++: `-lc_entropy`"
},
"metagenomics_prinseq_mode": {
"type": "string",
"default": "entropy",
"enum": ["entropy", "dust"],
"fa_icon": "fas fa-toggle-on",
"description": "Specify the complexity filter mode for PRINSEQ++.",
"help_text": "Specify the complexity filter mode for PRINSEQ++.\n\nUse the selected mode together with the correct flag:\n'dust' requires the `--metagenomics_prinseq_dustscore` parameter set\n'entropy' requires the `--metagenomics_complexity_entropy` parameter set\n\n> Modifies parameters:\n> - PRINSEQ++: `-lc_entropy`\n> - PRINSEQ++: `-lc_dust`"
},
"metagenomics_prinseq_dustscore": {
"type": "number",
"default": 0.5,
"fa_icon": "fas fa-head-side-mask",
"description": "Specify the minimum dust score for PRINTSEQ++ complexity filtering",
"help_text": "Specify the minimum dust score below which low-complexity reads will be removed. A DUST score is based on how often different tri-nucleotides occur along a read.\n\n> Modifies tool parameter(s):\n> - PRINSEQ++: `--lc_dust`"
},
"metagenomics_profiling_tool": {
"type": "string",
"description": "Specify which tool to use for metagenomic profiling and screening. Required if `--run_metagenomics` flagged.",
"enum": ["malt", "metaphlan", "kraken2", "krakenuniq"],
"fa_icon": "fas fa-toolbox",
"help_text": "Select which tool to run metagenomics profiling on designated metagenomics_input. These tools behave vastly differently due to performing read profiling using different methods and yield vastly different reuslts.\n\nMALT and MetaPhlAn are alignment based, whereas Kraken2 and KrakenUniq are k-mer based.\n\nMALT has addtional postprocessing available (via `--run_metagenomics_postprocessing`) which can help authenticate alignments to a provided list of taxonomic nodes using established ancientDNA characteristics.\n\nMetaPhlAn performs profiling on the metagenomcis input data. This may be used to characterize the metagenomic community of a sample but care must be taken that you are not just looking at the modern metagenome of an ancient sample (for instance, soil microbes on a bone)\n\n Kraken2 and KrakenUniq are metagenomics classifiers that rely on fast k-mer-matching rather than whole-read alignments and are very memory efficient."
},
"metagenomics_profiling_database": {
"type": "string",
"format": "path",
"description": "Specify a databse directory or .tar.gz file of a database directory to run metagenomics profiling on. Required if `--run_metagenomics` flagged.",
"fa_icon": "fas fa-database",
"help_text": "Specify a metagenomics profiling database to use with the designated metagenomics_profiling_tool on the selected metagenomics_input. Databases can be provided both as a directory, or a tar.gz of a directory. Metagenomic databases are NOT compatible across different tools (ie a MALT database is different from a kraken2 database).\n\nAll databases need to be pre-built/downloaded for use in nf-core/eager. Database construction is often a balancing act between breadth of sequence diversity and size.\n\nModifies tool parameter(s):\n> - krakenuniq: `--db`\n> - kraken2: `--db`\n> - MetaPhlAn: `--bowtie2db` and `--index`\n> - MALT: '-index'"
},
"metagenomics_kraken2_savereads": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Turn on saving reads assigned by KrakenUniq or Kraken2",
"help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--classified-out` and `--unclassified-out`"
},
"metagenomics_kraken2_savereadclassifications": {
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Turn on saving of KrakenUniq or Kraken2 per-read taxonomic assignment file",
"help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - krakenuniq: `--output`"
},
"metagenomics_krakenuniq_ramchunksize": {
"type": "string",
"default": "16G",
"description": "Specify how large to chunk database when loading into memory for KrakenUniq",
"fa_icon": "fas fa-database",
"help_text": "nf-core/eager utilises a 'low memory' option for KrakenUniq that can reduce the amount of RAM the process requires using the `--preloaded` option.\n\nA further extension to this option is that you can specify how large each chunk of the database should be that gets loaded into memory at any one time. You can specify the amount of RAM to chunk the database to with this parameter, and is particularly useful for people with limited computational resources.\n\nMore information about this parameter can be seen [here](https://github.com/fbreitwieser/krakenuniq/blob/master/README.md#new-release-v07).\n\n> Modifies KrakenUniq parameter: --preload-size\n\n"
},
"metagenomics_kraken2_saveminimizers": {
"type": "boolean",
"description": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.",
"fa_icon": "fas fa-save",
"help_text": "Turn on saving minimizer information in the kraken2 report thus increasing to an eight column layout.\n\n Modifies kraken2 parameter: `--report-minimizer-data`."
},
"metagenomics_malt_mode": {
"type": "string",
"default": "BlastN",
"description": "Specify which alignment mode to use for MALT.",
"fa_icon": "fas fa-align-left",
"help_text": "Use this to run the program in 'BlastN', 'BlastP', 'BlastX' modes to align DNA\nand DNA, protein and protein, or DNA reads against protein references respectively. Ensure your database matches the mode. Check the [MALT manual](http://ab.inf.uni-tuebingen.de/data/software/malt/download/manual.pdf) for more details.\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `-m`\n",
"enum": ["BlastN", "BlastP", "BlastX"]
},
"metagenomics_malt_alignmentmode": {
"type": "string",
"default": "SemiGlobal",
"description": "Specify alignment method for MALT.",
"fa_icon": "fas fa-align-center",
"help_text": "Specify what alignment algorithm to use. Options are 'Local' or 'SemiGlobal'. Local is a BLAST like alignment, but is much slower. Semi-global alignment aligns reads end-to-end. Default: `'SemiGlobal'`\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `-at`",
"enum": ["Local", "SemiGlobal"]
},
"metagenomics_malt_minpercentidentity": {
"type": "integer",
"default": 85,
"description": "Percent identity value threshold for MALT.",
"fa_icon": "fas fa-id-card",
"help_text": "Specify the minimum percent identity (or similarity) a sequence must have to the reference for it to be retained.\n\nOnly used when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT:`-id`"
},
"metagenomics_malt_toppercent": {
"type": "integer",
"default": 1,
"description": "Specify the percent for LCA algorithm for MALT (see MEGAN6 CE manual).",
"fa_icon": "fas fa-percent",
"help_text": "Specify the top percent value of the LCA algorithm. From the [MALT manual](http://ab.inf.uni-tuebingen.de/data/software/malt/download/manual.pdf): \"For each\nread, only those matches are used for taxonomic placement whose bit disjointScore is within\n10% of the best disjointScore for that read.\".\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `-top`"
},
"metagenomics_malt_minsupportmode": {
"type": "string",
"default": "percent",
"description": "Specify whether to use percent or raw number of reads for minimum support required for taxon to be retained for MALT.",
"fa_icon": "fas fa-drumstick-bite",
"help_text": "Specify whether to use a percentage, or raw number of reads as the value used to decide the minimum support a taxon requires to be retained.\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `-sup` and `-supp`",
"enum": ["percent", "reads"]
},
"metagenomics_malt_minsupportpercent": {
"type": "number",
"default": 0.01,
"description": "Specify the minimum percentage of reads a taxon of sample total is required to have to be retained for MALT.",
"fa_icon": "fas fa-percentage",
"help_text": "Specify the minimum number of reads (as a percentage of all assigned reads) a given taxon is required to have to be retained as a positive 'hit' in the RMA6 file. This only applies when `--malt_min_support_mode` is set to 'percent'.\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `-supp`"
},
"metagenomics_malt_minsupportreads": {
"type": "integer",
"default": 1,
"description": "Specify a minimum number of reads a taxon of sample total is required to have to be retained in malt or kraken. Not compatible with --malt_min_support_mode 'percent'.",
"fa_icon": "fas fa-sort-numeric-up-alt",
"help_text": "For usage in malt: Specify the minimum number of reads a given taxon is required to have to be retained as a positive 'hit'.\nFor malt, this only applies when `--malt_min_support_mode` is set to 'reads'. \n\n> Modifies tool parameter(s):\n> - MALT: `-sup` \n"
},
"metagenomics_malt_maxqueries": {
"type": "integer",
"default": 100,
"description": "Specify the maximum number of queries a read can have for MALT.",
"fa_icon": "fas fa-phone",
"help_text": "Specify the maximum number of alignments a read can have. All further alignments are discarded.\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `-mq`"
},
"metagenomics_malt_memorymode": {
"type": "string",
"default": "load",
"description": "Specify the memory load method. Do not use 'map' with GPFS file systems for MALT as can be very slow.",
"fa_icon": "fas fa-memory",
"help_text": "\nHow to load the database into memory. Options are `'load'`, `'page'` or `'map'`.\n'load' directly loads the entire database into memory prior seed look up, this\nis slow but compatible with all servers/file systems. `'page'` and `'map'`\nperform a sort of 'chunked' database loading, allowing seed look up prior entire\ndatabase loading. Note that Page and Map modes do not work properly not with\nmany remote file-systems such as GPFS.\n\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MALT: `--memoryMode`",
"enum": ["load", "page", "map"]
},
"metagenomics_malt_savereads": {
"type": "boolean",
"description": "Specify to also produce SAM alignment files. Note this includes both aligned and unaligned reads, and are gzipped. Note this will result in very large file sizes.",
"fa_icon": "fas fa-file-alt",
"help_text": "Specify to _also_ produce gzipped SAM files of all alignments and un-aligned reads in addition to RMA6 files. These are **not** soft-clipped or in 'sparse' format. Can be useful for downstream analyses due to more common file format. \n\n:warning: can result in very large run output directories as this is essentially duplication of the RMA6 files.\n\n> Sets tool parameter(s):\n> - MALT: `--alignments`"
},
"metagenomics_malt_group_size": {
"type": "integer",
"default": 0,
"description": "Define how many fastq files should be submitted in the same malt run. Default value of 0 runs all files at once.",
"fa_icon": "fas fa-barcode",
"help_text": "Very many (large) fastq files run through MALT at the same time can lead to excessively long runtimes. This parameter allows for parallelization of MALT runs. Please note, MALT is resource heavy and setting this value N above the default (0) will spawn multiple metagenomics_malt_group_size jobs where N is the number of samples per group. Please only use this if it is necessary to avoid runtime limits on your HPC cluster since the overhead of loading a database is high."
},
"metagenomics_run_postprocessing": {
"type": "boolean",
"description": "Activate post-processing of metagenomics profiling tool selected.",
"help_text": "Activate the corresponding post-processing tool for your metagenomics profiling software. \n\nmalt --> maltextract\nkrakenuniq/kraken2/metaphlan --> taxpasta\n\nNote: Postprocessing is automatically carried out when using `kraken2` and `krakenuniq` ",
"fa_icon": "fab fa-buromobelexperte"
},
"metagenomics_maltextract_taxonlist": {
"type": "string",
"description": "Path to a text file with taxa of interest (one taxon per row, NCBI taxonomy name format)",
"default": null,
"help_text": "Path to a `.txt` file with taxa of interest you wish to assess for aDNA characteristics. In `.txt` file should be one taxon per row, and the taxon should be in a valid [NCBI taxonomy](https://www.ncbi.nlm.nih.gov/taxonomy) name format corresponding to a taxonomic node in your MALT database. An example can be found on the [HOPS github](https://raw.githubusercontent.com/rhuebler/HOPS/external/Resources/default_list.txt).\\n\\nNecessary when `--metagenomics_profiling_tool malt` specified and `--metagenomics_run_postprocessing` flagged.\n\n Modifies tool parameter(s):\n> - MaltExtract: `-t`",
"fa_icon": "fas fa-align-left"
},
"metagenomics_maltextract_ncbidir": {
"type": "string",
"description": "Path to directory containing containing NCBI resource files (ncbi.tre and ncbi.map; available: https://github.com/rhuebler/HOPS/)",
"default": null,
"help_text": "Path to directory containing containing the NCBI resource tree and taxonomy table files (ncbi.tre and ncbi.map; available at the [HOPS repository](https://github.com/rhuebler/HOPS/Resources)).\\n\\nNecessary when `--metagenomics_profiling_tool malt` and `--metagenomics_run_postprocessing` specified.\n\n Modifies tool parameter(s):\n> - MaltExtract: `-r`",
"fa_icon": "fab fa-buffer"
},
"metagenomics_maltextract_filter": {
"type": "string",
"default": "def_anc",
"description": "Specify which MaltExtract filter to use.",
"help_text": "Specify which MaltExtract filter to use. This is used to specify what types of characteristics to scan for. The default will output statistics on all alignments, and then a second set with just reads with one C to T mismatch in the first 5 bases. Further details on other parameters can be seen in the [HOPS documentation](https://github.com/rhuebler/HOPS/#maltextract-parameters).\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MaltExtract: `-f`",
"fa_icon": "fas fa-filter",
"enum": ["def_anc", "default", "ancient", "scan", "crawl", "srna"]
},
"metagenomics_maltextract_toppercent": {
"type": "number",
"default": 0.01,
"description": "Specify percent of top alignments to use.",
"help_text": "Specify frequency of top alignments for each read to be considered for each node.\\n Note, value should be given in the format of a proportion (where 1 would correspond to 100%, and 0.1 would correspond to 10%).\\n\\n> :warning: this parameter follows the same concept as `--malt_top_percent` but uses a different notation i.e. integer (MALT) versus float (MALTExtract)\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MaltExtract: `-a`",
"fa_icon": "fas fa-percent"
},
"metagenomics_maltextract_destackingoff": {
"type": "boolean",
"description": "Turn off destacking.",
"help_text": "Turn off destacking. If left on, a read that overlaps with another read will be\\nremoved (leaving a depth coverage of 1).\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Sets tool parameter(s):\n> - MaltExtract: `--destackingOff`",
"fa_icon": "fab fa-stack-overflow"
},
"metagenomics_maltextract_downsamplingoff": {
"type": "boolean",
"description": "Turn off downsampling.",
"help_text": "Turn off downsampling. By default, downsampling is on and will randomly select 10,000 reads if the number of reads on a node exceeds this number. This is to speed up processing, under the assumption at 10,000 reads the species is a 'true positive'.\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Sets tool parameter(s):\n> - MaltExtract: `--downSampOff`",
"fa_icon": "fas fa-angle-double-down"
},
"metagenomics_maltextract_duplicateremovaloff": {
"type": "boolean",
"description": "Turn off duplicate removal.",
"help_text": "Turn off duplicate removal. By default, reads that are an exact copy (i.e. same start, stop coordinate and exact sequence match) will be removed as it is considered a PCR duplicate.\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Sets tool parameter(s):\n> - MaltExtract: `--dupRemOff`",
"fa_icon": "fas fa-copy"
},
"metagenomics_maltextract_matches": {
"type": "boolean",
"description": "Turn on exporting alignments of hits in BLAST format.",
"help_text": "Export alignments of hits for each node in BLAST format.\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MaltExtract: `--matches`",
"fa_icon": "fas fa-equals"
},
"metagenomics_maltextract_megansummary": {
"type": "boolean",
"description": "Turn on export of MEGAN summary files.",
"help_text": "Export 'minimal' summary files (i.e. without alignments) that can be loaded into [MEGAN6](https://doi.org/10.1371/journal.pcbi.1004957).\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Sets tool parameter(s):\n> - MaltExtract: `--meganSummary`"
},
"metagenomics_maltextract_minpercentidentity": {
"type": "number",
"default": 85,
"description": "Minimum percent identity alignments are required to have to be reported as candidate reads. Recommended to set same as MALT parameter.",
"help_text": "Minimum percent identity alignments are required to have to be reported. Higher values allows fewer mismatches between read and reference sequence, but therefore will provide greater confidence in the hit. Lower values allow more mismatches, which can account for damage and divergence of a related strain/species to the reference. Recommended to set same as MALT parameter or higher.\\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Modifies tool parameter(s):\n> - MaltExtract: `--minPI`"
},
"metagenomics_maltextract_usetopalignment": {
"type": "boolean",
"description": "Turn on using top alignments per read after filtering.",
"help_text": "Use the best alignment of each read for every statistic, except for those concerning read distribution and coverage. \\n\\nOnly when `--metagenomics_profiling_tool malt` is also supplied.\n\n> Sets tool parameter(s):\n> - MaltExtract: `--useTopAlignment`",
"fa_icon": "fas fa-bahai"
}
},
"fa_icon": "fas fa-search"
},
"deduplication": {
"title": "Deduplication",
"type": "object",
"description": "Options for removal of PCR duplicates",
"default": "",
"properties": {
"skip_deduplication": {
"type": "boolean",
"description": "Specify to skip the removal of PCR duplicates.",
"fa_icon": "fas fa-forward"
},
"deduplication_tool": {
"type": "string",
"default": "markduplicates",
"description": "Specify which tool to use for deduplication.",
"help_text": "Specify which duplicate read removal tool to use. While `markduplicates` is set by default, an ancient DNA specific read deduplication tool `dedup` is offered (see [Peltzer et al. 2016](https://doi.org/10.1186/s13059-016-0918-z) for details). The latter utilises both ends of paired-end data to remove duplicates (i.e. true exact duplicates, as markduplicates will over-zealously deduplicate anything with the same starting position even if the ends are different).\n\n> ⚠️ DeDup can only be used on collapsed (i.e. merged) reads from paired-end sequencing.",
"enum": ["markduplicates", "dedup"],
"fa_icon": "fas fa-hammer"
}
},
"fa_icon": "fas fa-clone"
},
"damage_manipulation": {
"title": "Damage Manipulation",
"type": "object",
"description": "Options for filtering for, trimming or rescaling characteristic ancient DNA damage patterns",
"default": "",
"fa_icon": "fas fa-chart-line",
"properties": {
"run_mapdamage_rescaling": {
"type": "boolean",
"fa_icon": "fas fa-power-off",
"description": "Specify to turn on damage rescaling of BAM files using mapDamage2 to probabilistically remove damage.",
"help_text": "Specify to turn on mapDamage2's BAM rescaling functionality. This probabilistically replaces Ts back to Cs depending on the likelihood this reference-mismatch was originally caused by damage. If the library is specified to be single-stranded, this will automatically use the `--single-stranded` mode.\nThis process will ameliorate the effects of aDNA damage, but also increase reference-bias.\n\n**This functionality does not have any MultiQC output.**\n ⚠️ Rescaled libraries will not be merged with non-scaled libraries of the same sample for downstream genotyping, as the model may be different for each library. If you wish to merge these, please do this manually and re-run nf-core/eager using the merged BAMs as input.\n\n> Modifies mapDamage2 parameter: `--rescale`"
},
"damage_manipulation_rescale_seqlength": {
"type": "integer",
"default": 12,
"description": "Specify the length of read sequence to use from each side for rescaling.",
"help_text": "Specify the length in bp from the end of the read that mapDamage should rescale at both ends. This can be overridden by `--rescalelength*p`.\n\n> Modifies mapDamage2 parameter: `--seq-length`",
"fa_icon": "fas fa-ruler-horizontal"
},
"damage_manipulation_rescale_length_5p": {
"type": "integer",
"default": 0,
"description": "Specify the length of read for mapDamage2 to rescale from 5 prime end.",
"help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. This overrides `--rescale_seqlength`.\n\n> Modifies mapDamage2 parameter: `--rescale-length-5p`",
"fa_icon": "fas fa-balance-scale-right"
},
"damage_manipulation_rescale_length_3p": {
"type": "integer",
"default": 0,
"description": "Specify the length of read for mapDamage2 to rescale from 3 prime end.",
"help_text": "Specify the length in bp from the end of the read that mapDamage should rescale. This overrides `--rescale_seqlength`.\n\n> Modifies mapDamage2 parameter `--rescale-length-3p`",
"fa_icon": "fas fa-balance-scale-left"
},
"run_pmd_filtering": {
"type": "boolean",
"description": "Specify to turn on PMDtools filtering.",
"help_text": "Specify to run PMDtools for damage-based read filtering in sequencing libraries.",
"fa_icon": "fas fa-power-off"
},
"damage_manipulation_pmdtools_threshold": {
"type": "integer",
"default": 3,
"fa_icon": "far fa-chart-bar",
"description": "Specify PMD score threshold for PMDtools.",
"help_text": "Specify the PMDScore threshold to use when filtering BAM files for DNA damage. Only reads which surpass this damage score are considered for downstream analysis.\n\n> Modifies PMDtools parameter: `--threshold`"
},
"damage_manipulation_pmdtools_masked_reference": {
"type": "string",
"fa_icon": "fas fa-mask",
"help_text": "Specify a FASTA file to use as reference for `samtools calmd` prior to PMD filtering.\nSetting the SNPs that are part of the used capture set as `N` can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a SNP to be counted as damage when it is a transition.",
"description": "Specify a masked FASTA file with positions to be used with PMDtools.",
"pattern": "^\\S+\\.fa?(\\sta)$",
"format": "file-path"
},
"damage_manipulation_pmdtools_reference_mask": {
"type": "string",
"fa_icon": "fas fa-mask",
"help_text": "Specify a BED file to activate masking of the reference FASTA at the contained sites prior to running PMDtools. Positions that are in the provided BED file will be replaced by Ns in the reference genome.\nThis can alleviate reference bias when running PMD filtering on capture data, where you might not want the allele of a transition SNP to be counted as damage. Masking of the reference is done using `bedtools maskfasta`.",
"description": "Specify a BED file to be used to mask the reference FASTA prior to running PMDtools.",
"pattern": "^\\S+\\.bed?(\\.gz)$",
"format": "file-path"
},
"run_trim_bam": {
"type": "boolean",
"fa_icon": "fas fa-power-off",
"description": "Specify to turn on BAM trimming for non-UDG or half-UDG libraries.",
"help_text": "Specify to turn on the BAM trimming of [n] bases from reads in the deduplicated BAM file. Damage assessment in PMDtools or DamageProfiler remains untouched, as data is routed through this independently. BAM trimming is typically performed to reduce errors during genotyping that can be caused by aDNA damage.\n\nBAM trimming will only affect libraries with 'damage_treatment' of 'none' or 'half'. Complete UDG treatment ('full') should have removed all damage during library construction, so trimming of 0 bp is performed. The amount of bases that will be trimmed off from each side of the molecule should be set separately for libraries depending on their 'strandedness' and 'damage_treatment'.\n\n> Note: additional artefacts such as barcodes or adapters should be removed prior to mapping and not in this step."
},
"damage_manipulation_bamutils_trim_double_stranded_none_udg_left": {
"type": "integer",
"default": 0,
"fa_icon": "fas fa-cut",
"description": "Specify the number of bases to clip off reads from 'left' (5 prime) end of reads for double-stranded non-UDG libraries.",
"help_text": "Specify the number of bases to clip off reads from 'left' (5 prime) end of reads for double-stranded non-UDG libraries. By default, this is set to 0, and therefore clips off no bases on the left side of reads from double-stranded libraries whose UDG treatment is set to 'none'. Note that reverse reads will automatically be clipped off at the reverse side with this (automatically reverses left and right for the reverse read).\n\n> Modifies bamUtil's trimBam parameter: `-L`"
},
"damage_manipulation_bamutils_trim_double_stranded_none_udg_right": {
"type": "integer",
"default": 0,