-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Truongphikt add module mapping from KTest-VN/mapping@7c4f68d
- Loading branch information
1 parent
97fc256
commit cc095cf
Showing
15 changed files
with
356 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
bin/__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# Mapping module | ||
## 1. Input channels | ||
|
||
|
||
<table class="tg" style="undefined;table-layout: fixed; width: 721px"> | ||
<colgroup> | ||
<col style="width: 142px"> | ||
<col style="width: 579px"> | ||
<col style="width: 579px"> | ||
</colgroup> | ||
<thead> | ||
<tr> | ||
<th class="tg-0pky"><span style="font-weight:bold">Channel</span></th> | ||
<th class="tg-0pky"><span style="font-weight:bold">Value</span></th> | ||
<th class="tg-0pky"><span style="font-weight:bold">Example</span></th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<td class="tg-lboi">Data channel</td> | ||
<td class="tg-lboi">- <span style="font-style:italic">rg_id</span>: Group ID / Sample ID <br>- <span style="font-style:italic">sample_name</span>: Name of sample<br>- <span style="font-style:italic">library_id</span>: Unique ID of library of sample<br>- <span style="font-style:italic">lane</span>: Lane on sequencer<br>- <span style="font-style:italic">platform</span>: Platform of sequencer (Illumina/MCI)<br>- <span style="font-style:italic">machine</span>: Name of the sequencer, e.g. Hiseq X<br>- <span style="font-style:italic">orient</span>: The orientation, forward or reverse (1 or 2)<br>- <span style="font-style:italic">object</span>: Which object's sequence? e.g. human, shrimp, ...<br>- path: Path of fastq files</td> | ||
<td>[rg_id, sample_name, library_id, lane, platform, machine, orient, object, path]<br/><br/><img src="https://github.com/KTest-VN/mapping/assets/141545014/b01cac19-1bf2-4cc0-884c-b6cbedcf2462" alt="Data channel"></td> | ||
</tr> | ||
<tr> | ||
<td class="tg-0lax">Reference channel</td> | ||
<td class="tg-0lax">- <a href="https://ktest-dattn.atlassian.net/browse/PRS-92">bwa</a>: Folder contains index set of the reference genome (Indexing by BWA)</td> | ||
<td>[bwa]<br/><br/><img src="https://github.com/KTest-VN/mapping/assets/141545014/77567dfd-891d-45df-b593-199ca060a389" alt="Reference channel"></td> | ||
|
||
</tr> | ||
</tbody> | ||
</table> | ||
|
||
|
||
## 2. Output channels | ||
|
||
<table class="tg" style="undefined;table-layout: fixed; width: 792px"> | ||
<colgroup> | ||
<col style="width: 202px"> | ||
<col style="width: 590px"> | ||
<col style="width: 590px"> | ||
</colgroup> | ||
<thead> | ||
<tr> | ||
<th class="tg-0pky"><span style="font-weight:bold">Channel</span></th> | ||
<th class="tg-0pky"><span style="font-weight:bold">Value</span></th> | ||
<th class="tg-0pky"><span style="font-weight:bold">Example</span></th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<td class="tg-lboi">from_mapping</td> | ||
<td class="tg-lboi"><span style="font-weight:400;font-style:normal">- </span><span style="font-style:italic">object</span>: Which object's sequence? e.g. human, shrimp, ...<br>- <span style="font-style:italic">rg_id</span>: Group ID / Sample ID <br>- library_id: Unique ID of library of sample<br>- <span style="font-style:italic">dedup_bam</span>: Bam file after mark duplicate<br>- <span style="font-style:italic">dedup_bai</span>: Bai file, index of bam file</td> | ||
<td>[val(object), val(rg_id), val(library_id), path(dedup_bam), path(dedup_bai)]<br/><br/><img src=https://github.com/KTest-VN/mapping/assets/141545014/7db4f63d-832c-41f2-b3f5-c8cce812416c> | ||
</td> | ||
</tr> | ||
</tbody> | ||
</table> | ||
|
||
## 3. Processes | ||
|
||
<table class="tg" style="undefined;table-layout: fixed; width: 721px"> | ||
<colgroup> | ||
<col style="width: 142px" /> | ||
<col style="width: 579px" /> | ||
<col style="width: 579px" /> | ||
<col style="width: 579px" /> | ||
</colgroup> | ||
<thead> | ||
<tr> | ||
<th class="tg-0pky"><span style="font-weight: 400;">Process</span></th> | ||
<th class="tg-0pky"><span style="font-weight: 400;">Input Channel</span></th> | ||
<th class="tg-0pky"><span style="font-weight: 400;">Output Channel</span></th> | ||
<th class="tg-0pky"><span style="font-weight: 400;">Description</span></th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<tr> | ||
<td class="tg-lboi">CAT_FILE</td> | ||
<td class="tg-lboi">[val(object), val(rg_id), val(library_id), val(platform), val(machine), val(orient), [val(path1), val(path2)]]</td> | ||
<td>[val(object), val(rg_id), val(library_id), val(platform), val(machine), val(orient), path(cat_orient_fastq_gz)</td> | ||
<td>Concate fastq files come from identical sample</td> | ||
</tr> | ||
<tr> | ||
<td class="tg-lboi">FASTQC</td> | ||
<td class="tg-lboi">[val(rg_id), val(library_id), path(fastq_path)]</td> | ||
<td>Comming...</td> | ||
<td>Run fastqc</td> | ||
</tr> | ||
<tr> | ||
<td class="tg-lboi">MAP_BAM</td> | ||
<td class="tg-lboi">[val(object), val(rg_id), val(library_id), val(platform), val(machine), path(fastq_path), path(bwa_ref)]</td> | ||
<td>[val(object), val(rg_id), val(library_id), path(pe_sorted_bam)]</td> | ||
<td>Mapping and sorting</td> | ||
</tr> | ||
<tr> | ||
<td class="tg-lboi">BAM_INDEX</td> | ||
<td class="tg-lboi">[val(object), val(rg_id), val(library_id), path(dedup_bam)]</td> | ||
<td>[val(object), val(rg_id), val(library_id), path(dedup_bai)]</td> | ||
<td>Indexing bam file</td> | ||
</tr> | ||
</tbody> | ||
</table> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
params{ | ||
cache_sing_folder = "/home/ktest/pipeline_env/software/truongphi" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
params{ | ||
samplesheet = "" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
params { | ||
ref_folder = '/home/ktest/project/trucle/LCWGS-4/LCWGS-28/reference' | ||
samplesheet = '/home/ktest/project/trucle/LCWGS-8/LCWGS-18/data_test/sample_sheet1.tsv' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
params{ | ||
samplesheet = "$projectDir/tests/test_sample_sheet.tsv" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/usr/bin/env nextflow | ||
include { MAPPING } from "./mapping.nf" | ||
|
||
workflow{ | ||
//INPUT CHANEL | ||
input_channel = Channel.fromPath("$params.samplesheet") | ||
.splitCsv(skip: 1, sep: '\t') | ||
// [rg_id, sample_name, library_id, lane, platform, machine, orient, object, path] | ||
|
||
params.ref_pattern = "$params.ref_folder/*.{fa,fa.amb,fa.ann,fa.bwt,fa.fai,fa.pac,fa.sa}" | ||
|
||
reference_channel = Channel.fromPath(params.ref_pattern).collect().map{[it]} // [bwa_ref] | ||
|
||
//MAPPING | ||
MAPPING( | ||
input_channel, | ||
reference_channel | ||
) | ||
|
||
//EMIT | ||
//MAPPING.out.from_mapping // [val(object), val(rg_id), val(library_id), path(dedup_bam), path(dedup_bai)] | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
include { FASTQC } from "./modules/fastqc.nf" | ||
include { CAT_FILE } from "./modules/cat_file.nf" | ||
include { MAP_BAM } from "./modules/map_bam.nf" | ||
include { MARKDUPLICATES } from "./modules/markduplicates.nf" | ||
include { BAM_INDEX } from "./modules/bam_index.nf" | ||
|
||
workflow MAPPING{ | ||
take: | ||
input_channel // [rg_id, sample_name, library_id, lane, platform, machine, orient, object, path] | ||
reference_channel // [bwa_ref] | ||
|
||
main: | ||
|
||
// Cat fastq file | ||
input_channel.map{ [it[7],it[0],it[2],it[4],it[5],it[6],it[-1]] } // [object, rg_id, library_id, platform, machine, orient, path] | ||
.groupTuple(by: [0,1,2,3,4,5], sort: true) | ||
.branch{ | ||
cat: it[-1].size() > 1 // [object, rg_id, library_id, platform, machine, orient, [path1, path2]] | ||
non_cat: true | ||
return it.flatten() // [object, rg_id, library_id, platform, machine, orient, path] | ||
} | ||
.set{cat_filter} | ||
|
||
|
||
CAT_FILE( | ||
cat_filter.cat | ||
) | ||
|
||
// Combine raw input | ||
raw_input = cat_filter.non_cat | ||
.concat(CAT_FILE.out) // [object, rg_id, library_id, platform, machine, orient, path] | ||
|
||
// Fastqc | ||
FASTQC( | ||
raw_input.map{ | ||
[it[1], it[2], it[-1]] | ||
} // [val(rg_id), val(library_id), path(fastq_path)] | ||
) | ||
|
||
|
||
MAP_BAM( | ||
raw_input.groupTuple(by: [0,1,2,3,4], sort:true) // [object, rg_id, library_id, platform, machine, orient, path] | ||
.map{ | ||
it[0..4] + [it[-1]] // [object, rg_id, library_id, platform, machine, [path1, path2]] | ||
}.combine(reference_channel) // [object, rg_id, library_id, platform, machine, [path1, path2], bwa_ref] | ||
|
||
|
||
) | ||
|
||
MARKDUPLICATES{ | ||
MAP_BAM.out.sorted // [val(object), val(rg_id), val(library_id), path("${rg_id}_${library_id}.pe.sorted.bam")] | ||
} | ||
|
||
BAM_INDEX{ | ||
MARKDUPLICATES.out.dedup_bam // [val(object), val(rg_id), val(library_id), path("${rg_id}_${library_id}.dedup.bam")] | ||
} | ||
|
||
emit: | ||
from_mapping = MARKDUPLICATES.out.dedup_bam | ||
.combine( | ||
BAM_INDEX.out.dedup_bai, by: [0,1,2] | ||
) // [val(object), val(rg_id), val(library_id), path(dedup_bam), path(dedup_bai)] | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
process BAM_INDEX{ | ||
tag "$rg_id" | ||
|
||
container "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" | ||
memory { 30.GB * task.attempt } | ||
cpus { 5 * task.attempt } | ||
|
||
input: | ||
tuple val(object), val(rg_id), val(library_id), path(dedup_bam) | ||
|
||
output: | ||
tuple val(object), val(rg_id), val(library_id), path("${rg_id}_${library_id}.dedup.bai"), emit: dedup_bai | ||
|
||
""" | ||
picard BuildBamIndex INPUT=$dedup_bam\ | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
process CAT_FILE{ | ||
tag "$rg_id" | ||
|
||
container "ubuntu:rolling" | ||
memory { 20.GB * task.attempt } | ||
cpus { 4 * task.attempt } | ||
|
||
input: | ||
tuple val(object), | ||
val(rg_id), | ||
val(library_id), | ||
val(platform), | ||
val(machine), | ||
val(orient), | ||
path(fastq_files) | ||
|
||
output: | ||
tuple val(object), | ||
val(rg_id), | ||
val(library_id), | ||
val(platform), | ||
val(machine), | ||
path("${library_id}_${rg_id}_cat_${orient}.fastq.gz") | ||
|
||
""" | ||
cat ${fastq_files} > ${library_id}_${rg_id}_cat_${orient}.fastq.gz | ||
for filename in `ls *.gz`; do | ||
read_num=\$(zcat \$filename | echo \$((`wc -l`/4))) | ||
echo "\$filename has: \$read_num reads" | ||
done | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
process FASTQC{ | ||
tag "$rg_id" | ||
|
||
container "phinguyen2000/fastqc_v0.12.1:v0.1.0" | ||
memory { 30.GB * task.attempt } | ||
cpus { 16 * task.attempt } | ||
|
||
|
||
input: | ||
tuple val(rg_id), val(library_id), path(fastq_path) | ||
|
||
|
||
""" | ||
fastqc --threads 20 $fastq_path | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
process MAP_BAM{ | ||
tag "$object:$rg_id" | ||
|
||
container "phinguyen2000/mapping:v0.1.0" | ||
memory { 30.GB * task.attempt } | ||
cpus { 8 * task.attempt } | ||
|
||
input: | ||
tuple val(object), val(rg_id), val(library_id), val(platform), val(machine), path(fastq_path), path(bwa_ref) | ||
|
||
output: | ||
tuple val(object), val(rg_id), val(library_id), path("${rg_id}_${library_id}.pe.sorted.bam"), emit: sorted | ||
""" | ||
threads=20 | ||
bwa mem -t \$threads\ | ||
-R "@RG\\tID:${rg_id}\\tLB:${library_id}\\tPL:${platform}\\tPM:${machine}\\tSM:${rg_id}"\ | ||
-M ${bwa_ref[0]} \ | ||
$fastq_path | samtools sort -@\$threads -o ${rg_id}_${library_id}.pe.sorted.bam | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
process MARKDUPLICATES{ | ||
tag "$object:$rg_id" | ||
|
||
container "phinguyen2000/gatk_tabix:v0.1.0" | ||
memory { 30.GB * task.attempt } | ||
cpus { 10 * task.attempt } | ||
|
||
input: | ||
tuple val(object), val(rg_id), val(library_id), path(pe_sorted_bam) | ||
|
||
output: | ||
tuple val(object), val(rg_id), val(library_id), path("${rg_id}_${library_id}.dedup.bam"), emit: dedup_bam | ||
|
||
""" | ||
gatk MarkDuplicates \ | ||
-I $pe_sorted_bam \ | ||
-O ${rg_id}_${library_id}.dedup.bam \ | ||
-M ${rg_id}_${library_id}.dedup.metrics.txt | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Load params for inputs | ||
includeConfig 'conf/input.config' | ||
|
||
// Load params common for all modules | ||
includeConfig 'conf/base.config' | ||
|
||
// Load config for modules | ||
includeConfig 'conf/mapping.config' | ||
|
||
nextflow.enable.dsl = 2 | ||
|
||
tower { | ||
enabled = true | ||
accessToken = "$TOWER_ACCESS_TOKEN" | ||
workspaceId = '222915005021784' | ||
} | ||
|
||
|
||
profiles{ | ||
cluster { | ||
executor{ | ||
name = 'slurm' | ||
queueSize = 30 | ||
} | ||
} | ||
|
||
test1 { includeConfig 'conf/test/test1.config' } | ||
} | ||
|
||
singularity{ | ||
enabled = true | ||
cacheDir = "$params.cache_sing_folder" | ||
runOptions = "--bind /home" | ||
} | ||
|
||
process{ | ||
|
||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } | ||
maxRetries = 3 | ||
|
||
queue = 'dev' | ||
maxForks = 30 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
rg_id sample_name library_id lane platform machine orient object path | ||
SRR14775139 A01 RANDOM S1 ILLUMINA Novaseq 1 pig /home/ktest/project/trucle/LCWGS-8/LCWGS-18/data_test/SRR14775139_1.fastq.gz | ||
SRR14775139 A01 RANDOM S1 ILLUMINA Novaseq 2 pig /home/ktest/project/trucle/LCWGS-8/LCWGS-18/data_test/SRR14775139_2.fastq.gz |