From 51a3579097132d77adeccde252860c0604d3fb9c Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Sat, 6 Jul 2024 16:37:02 +0000
Subject: [PATCH] Truongphikt add module calling from KTest-VN/calling@e461b84

---
 modules/ktest/calling/.gitignore              |   1 +
 modules/ktest/calling/README.md               | 167 ++++++++++++++++++
 modules/ktest/calling/calling.nf              |  74 ++++++++
 modules/ktest/calling/conf/base.config        |   3 +
 modules/ktest/calling/conf/calling.config     |   5 +
 modules/ktest/calling/conf/input.config       |   3 +
 .../ktest/calling/conf/ktest_cluster.config   |  19 ++
 modules/ktest/calling/conf/test.config        |   4 +
 .../ktest/calling/conf/test/test_human.config |   3 +
 .../ktest/calling/conf/test/test_pig.config   |   6 +
 modules/ktest/calling/main.nf                 |  21 +++
 modules/ktest/calling/modules/apply_bqsr.nf   |  28 +++
 .../calling/modules/base_recalibrator.nf      |  32 ++++
 .../ktest/calling/modules/call_variants.nf    |  24 +++
 .../ktest/calling/modules/draft_calling.nf    |  25 +++
 modules/ktest/calling/modules/draft_join.nf   |  36 ++++
 modules/ktest/calling/modules/joining.nf      |  34 ++++
 modules/ktest/calling/modules/split_chr.nf    |  21 +++
 modules/ktest/calling/nextflow.config         |  26 +++
 .../ktest/calling/tests/test_sample_human.tsv |   4 +
 .../ktest/calling/tests/test_sample_pig.tsv   |   2 +
 21 files changed, 538 insertions(+)
 create mode 100644 modules/ktest/calling/.gitignore
 create mode 100644 modules/ktest/calling/README.md
 create mode 100644 modules/ktest/calling/calling.nf
 create mode 100644 modules/ktest/calling/conf/base.config
 create mode 100644 modules/ktest/calling/conf/calling.config
 create mode 100644 modules/ktest/calling/conf/input.config
 create mode 100644 modules/ktest/calling/conf/ktest_cluster.config
 create mode 100644 modules/ktest/calling/conf/test.config
 create mode 100644 modules/ktest/calling/conf/test/test_human.config
 create mode 100644 modules/ktest/calling/conf/test/test_pig.config
 create mode 100644 modules/ktest/calling/main.nf
 create mode 100644 modules/ktest/calling/modules/apply_bqsr.nf
 create mode 100644 modules/ktest/calling/modules/base_recalibrator.nf
 create mode 100644 modules/ktest/calling/modules/call_variants.nf
 create mode 100644 modules/ktest/calling/modules/draft_calling.nf
 create mode 100644 modules/ktest/calling/modules/draft_join.nf
 create mode 100644 modules/ktest/calling/modules/joining.nf
 create mode 100644 modules/ktest/calling/modules/split_chr.nf
 create mode 100755 modules/ktest/calling/nextflow.config
 create mode 100644 modules/ktest/calling/tests/test_sample_human.tsv
 create mode 100755 modules/ktest/calling/tests/test_sample_pig.tsv
diff --git a/modules/ktest/calling/.gitignore b/modules/ktest/calling/.gitignore
new file mode 100644
index 0000000..731e034
--- /dev/null
+++ b/modules/ktest/calling/.gitignore
@@ -0,0 +1 @@
+bin/__pycache__
\ No newline at end of file
diff --git a/modules/ktest/calling/README.md b/modules/ktest/calling/README.md
new file mode 100644
index 0000000..7288076
--- /dev/null
+++ b/modules/ktest/calling/README.md
@@ -0,0 +1,167 @@
+# Calling module
+
+Calling variants from mark duplicated bam file ([PRS-62](https://ktest-dattn.atlassian.net/browse/PRS-62)).
+
+------------------
+# 1. Usages
+Input params (view `conf/input.config`): 
+- `from_mapping_csv` path of sample sheet
+
+Calling params (view `conf/calling.config`):
+
+- `folder_ref`: the path of the folder contains reference genome
+- `genome_name`: file name of fasta genome reference laid in folder_ref
+- `human_knownsite_vcf`: Known-sites reference for BASE_RECALIBRATOR process in module Calling.
+
+
+# 2. Channels
+## 2.1 Input channels
+
+<table class="tg" style="undefined;table-layout: fixed; width: 721px">
+<colgroup>
+<col style="width: 142px">
+<col style="width: 579px">
+<col style="width: 579px">
+</colgroup>
+<thead>
+  <tr>
+    <th class="tg-0pky"><span style="font-weight:bold">Channel</span></th>
+    <th class="tg-0pky"><span style="font-weight:bold">Value</span></th>
+    <th class="tg-0pky"><span style="font-weight:bold">Example</span></th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td class="tg-lboi">from_mapping</td>
+    <td class="tg-lboi">
+      - <span style="font-style:italic">key</span>: key from higher hierarchical structure (e.g. null, 'arrayA_batch4', ...) <br>
+      - <span style="font-style:italic">rg_id</span>: Group ID / Sample ID <br>
+      - <span style="font-style:italic">object</span>: Which object's sequence? e.g. human, shrimp, ...<br>
+      - <span style="font-style:italic">dedup_bam</span>: Path of bam file, after duplicate<br>
+      - <span style="font-style:italic">dedup_bai</span>: Path of *.bai file, index file of bam file.<br>
+    </td>
+    <td>([val(key), val(rg_id), val(object), path(dedup_bam), path(dedup_bai)])<br/>
+  </tr>
+</tbody>
+</table>
+
+## 2.2 Reference channels
+
+<table class="tg" style="undefined;table-layout: fixed; width: 721px">
+<colgroup>
+<col style="width: 142px">
+<col style="width: 579px">
+<col style="width: 579px">
+</colgroup>
+<thead>
+  <tr>
+    <th class="tg-0pky"><span style="font-weight:bold">Channel</span></th>
+    <th class="tg-0pky"><span style="font-weight:bold">Value</span></th>
+    <th class="tg-0pky"><span style="font-weight:bold">Example</span></th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td class="tg-0lax">calling_reference</td>
+    <td class="tg-0lax">
+      - <a href="https://ktest-dattn.atlassian.net/browse/PRS-93">folder_ref</a>: Folder contains reference genome and indexing *.{fa, fa.fai, dict}. <br>
+      - genome_ref: file name of fasta genome reference laid in folder_ref. (e.g. Homo_sapiens.GRCh38.dna.primary_assembly.fa) <br>
+      - <a href="https://ktest-dattn.atlassian.net/browse/PRS-96">human_knownsite_vcf</a>: Known-sites reference for BASE_RECALIBRATOR process in module Calling.
+    </td>
+    <td>
+	    ([folder_ref, genome_ref, human_knownsite_vcf])
+    </td>
+
+  </tr>
+</tbody>
+</table>
+
+## 2.3. Output channels
+
+<table class="tg" style="undefined;table-layout: fixed; width: 792px">
+<colgroup>
+<col style="width: 202px">
+<col style="width: 590px">
+<col style="width: 590px">
+</colgroup>
+<thead>
+  <tr>
+    <th class="tg-0pky"><span style="font-weight:bold">Channel</span></th>
+    <th class="tg-0pky"><span style="font-weight:bold">Value</span></th>
+    <th class="tg-0pky"><span style="font-weight:bold">Example</span></th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td class="tg-lboi">split_vcf</td>
+    <td class="tg-lboi"><span style="font-weight:400;font-style:normal">
+      - </span><span style="font-style:italic">object</span>: Which object's sequence? e.g. human, shrimp, ...<br>
+      - <span style="font-style:italic">chr</span>: Order number of chromosome (1→22)
+      - <span style="font-style:italic">split_vcf</span>: Cohort vcf file after split by chromosome and its index<br>
+    </td>
+    <td>[val(object), val(chr), [path(split_vcf), path(split_vcf_tbi)]]<br/></td>
+  </tr>
+</tbody>
+</table>
+
+## 2.4. Processes
+
+<table class="tg" style="undefined;table-layout: fixed; width: 721px">
+	<colgroup>
+		<col style="width: 142px" />
+		<col style="width: 579px" />
+		<col style="width: 579px" />
+    <col style="width: 579px" />
+	</colgroup>
+	<thead>
+		<tr>
+			<th class="tg-0pky"><span style="font-weight: 400;">Process</span></th>
+			<th class="tg-0pky"><span style="font-weight: 400;">Input Channel</span></th>
+			<th class="tg-0pky"><span style="font-weight: 400;">Output Channel</span></th>
+      <th class="tg-0pky"><span style="font-weight: 400;">Description</span></th>
+		</tr>
+	</thead>
+	<tbody>
+		<tr>
+			<td class="tg-lboi">DRAFT_CALLING</td>
+			<td class="tg-lboi">[val(key), al(object), val(rg_id), path(dedup_bam), path(dedup_bai), path(folder_ref), val(genome_name)]</td>
+			<td>[val(key), val(object), val(rg_id), path(raw_variants_vcf)</td>
+      <td>Draft variant calling</td>
+		</tr>
+    <tr>
+			<td class="tg-lboi">DRAFT_JOIN</td>
+			<td class="tg-lboi">[val(key), val(object), path(raw_variants_vcf), path(folder_ref), val(genome_name)]</td>
+			<td>[val(key), val(object), path(joint_genotyped_draft{vcf.gz,vcf.gz.idx})]</td>
+      <td>Join draft variant VCF files into a draft cohort VCF file</td>
+		</tr>
+    <tr>
+			<td class="tg-lboi">BASE_RECALIBRATOR</td>
+			<td class="tg-lboi">[val(object), val(rg_id), path(dedup_bam), path(dedup_bai), val(joint_genotyped_draft_vcf), val(genome_name), path(human_knownsite_vcf)]</td>
+			<td>[val(object), val(rg_id), path(recal_data_table)]</td>
+      <td>Statistic impute and Overwrite original reported quality score in each mismatch with reference</td>
+		</tr>
+    <tr>
+			<td class="tg-lboi">APPLY_BQSR</td>
+			<td class="tg-lboi">[val(key), val(object), val(sample_id), path(dedup_bam), path(dedup_bai), path(recal_data_table), path(folder_ref), val(genome_name)]</td>
+			<td>[val(key), val(object), val(sample_id), path(recal_bam)]</td>
+      <td>Apply base quality score recalibration</td>
+		</tr>
+    <tr>
+			<td class="tg-lboi">CALL_VARIANTS</td>
+			<td class="tg-lboi">[val(key), val(object), val(rg_id), path(recal_bam)]</td>
+			<td>[val(key), val(object), val(rg_id), path(recal_vcf)]</td>
+      <td>Call germline SNPs and indels via local re-assembly of haplotypes</td>
+		</tr>
+    <tr>
+			<td class="tg-lboi">JOINING</td>
+			<td class="tg-lboi">[val(key), val(object), val(rg_id), path(variants_vcf), path(folder_ref), val(genome_name)</td>
+			<td>[val(key), val(object), path(joint_genotyped{vcf.gz,vcf.gz.tbi})]</td>
+      <td>Join variant VCF files into a draft cohort VCF file</td>
+		</tr>
+    <tr>
+			<td class="tg-lboi">SPLIT_CHR</td>
+			<td class="tg-lboi">[val(key), val(object), path(joint_genotyped), val(chr)</td>
+			<td>[val(key), val(object), val(chr), path(split_vcf{vcf.gz,vcf.gz.tbi})]</td>
+      <td>Split the cohort vcf file by chromosome</td>
+		</tr>
+</table>
diff --git a/modules/ktest/calling/calling.nf b/modules/ktest/calling/calling.nf
new file mode 100644
index 0000000..fe068a0
--- /dev/null
+++ b/modules/ktest/calling/calling.nf
@@ -0,0 +1,74 @@
+include { DRAFT_CALLING }                       from        "./modules/draft_calling.nf"
+include { DRAFT_JOIN }                          from        "./modules/draft_join.nf"
+include { BASE_RECALIBRATOR }                   from        "./modules/base_recalibrator.nf"
+include { APPLY_BQSR }                          from        "./modules/apply_bqsr.nf"
+include { CALL_VARIANTS }                       from        "./modules/call_variants.nf"
+include { JOINING }                             from        "./modules/joining.nf"
+include { SPLIT_CHR }                           from        "./modules/split_chr.nf"
+
+workflow CALLING{
+    take:
+    from_mapping                                                                                    // ([val(key), val(object), val(rg_id), path(dedup_bam), path(dedup_bai)])
+    calling_reference                                                                               // ([path(folder_ref), val(genome_ref_name), [human_knownsite_vcf, human_knownsite_vcf_tbi]])
+
+    main:                                                                                 
+    // Split branch human object and others
+    from_mapping.branch{
+                    human: it[1] == 'human'
+                        return it + [["/dev/null"]]                                                 // ([val(key), val(object), val(rg_id), path(dedup_bam), path(dedup_bai), [path(null_file)]])
+                    others: true                                                                    // ([val(key), val(object), val(rg_id), path(dedup_bam), path(dedup_bai)])
+                }.set{ human_selector }
+    
+    //===============IF NOT BEING HUMAN OBJECT==================
+    DRAFT_CALLING{
+        human_selector.others                                                                       // ([val(key), val(object), val(rg_id), path(dedup_bam), path(dedup_bai)])
+                    .combine(calling_reference.map{it[0,1]})                                        // ([val(key), val(object), val(rg_id), path(dedup_bam), path(dedup_bai), path(folder_ref), val(genome_name)])                                                  
+    }
+
+    DRAFT_JOIN{
+        DRAFT_CALLING.out.raw_variants_vcf                                                          // ([val(key), val(object), val(rg_id), path(raw_variants_vcf_gz)])
+                    .groupTuple(by: [0,1])                                                          // ([val(key), val(object), [val(rg_id), ...], [path(raw_variants_vcf_gz), ...]])  
+                    .combine(calling_reference.map{it[0,1]})                                        // ([val(key), val(object), [val(rg_id), ...], [path(raw_variants_vcf_gz),...], path(folder_ref), val(genome_name)]) 
+    }                                                                                                
+
+
+    non_human_pkg = from_mapping.combine(
+                    DRAFT_JOIN.out.joint_genotyped_draft,
+                    by: [0,1]                                                                       // ([val(key), val(object), val(rg_id), path(dedup.bam), path(dedup.bai), [path(cohort_draft_vcf), path(cohort_draft_vcf_idx)]])
+                )                                                                         
+    //==========================================================
+    BASE_RECALIBRATOR{
+        human_selector.human
+                    .concat(non_human_pkg)                                                          // ([val(key), val(object), val(rg_id), path(dedup.bam), path(dedup.bai), [path(cohort_draft_vcf), path(cohort_draft_vcf_idx)]])
+                    .combine(calling_reference.map{it[0..2]})                                       // ([val(key), val(object), val(rg_id), path(dedup.bam), path(dedup.bai), [path(cohort_draft_vcf), path(cohort_draft_vcf_idx)], path(folder_ref), val(genome_name), path(human_knownsite_vcf)])
+                                                                                                    
+    }
+
+    APPLY_BQSR(
+        from_mapping.combine(
+                    BASE_RECALIBRATOR.out.recal_data_table, by:[0,1,2]                              // ([val(key), val(object), val(rg_id), path(dedup.bam), path(dedup.bai), path(recal_data_table)])
+                    ).combine(calling_reference.map{it[0,1]})                                       // ([val(key), val(object), val(rg_id), path(dedup.bam), path(dedup.bai), path(recal_data_table), path(folder_ref), val(genome_name)])
+    )
+
+    CALL_VARIANTS(
+        APPLY_BQSR.out.recal_bam                                                                    // ([val(key), val(object), val(rg_id), path(recal_bam)])
+                    .combine(calling_reference.map{it[0,1]})                                        // ([val(key), val(object), val(rg_id), path(recal_bam), path(folder_ref), val(genome_name)])          
+    )
+
+    JOINING(
+        CALL_VARIANTS.out.variants_recal_vcf                                                        // ([val(key), val(object), val(rg_id), [path(variants_recal_vcf_gz), path(variants_recal_vcf_gz_tbi)]])
+                        .map{it.flatten()}                                                          // ([val(key), val(object), val(rg_id), path(variants_recal_vcf_gz), path(variants_recal_vcf_gz_tbi)])
+                        .groupTuple(by: [0,1])                                                      // ([val(key), val(object), [rg_id1, rg_id2, ...], [path(variants_recal_vcf_gz),...)], [path(variants_recal_vcf_gz_tbi), ...]])
+                        .combine(calling_reference.map{it[0,1]})                                    // ([val(key), val(object), [rg_id1, rg_id2, ...], [path(variants_recal_vcf_gz),...)], [path(variants_recal_vcf_gz_tbi), ...], path(folder_ref), val(genome_name)])             
+                    
+    )
+
+    // Split by chromosomes 
+    SPLIT_CHR(
+        JOINING.out.cohort_vcf                                                                      // ([val(key), val(object), [path(joint_genotyped_vcf_gz), path(joint_genotyped_vcf_gz_tbi)]])
+                    .combine(Channel.of(1..22).map{it.toString()})                                  // ([val(key), val(object), [path(joint_genotyped_vcf_gz), path(joint_genotyped_vcf_gz_tbi)], val(chr)])
+    )
+
+    emit:
+    split_vcf = SPLIT_CHR.out.split_vcf                                                             // ([val(key), val(object), val(chr), [path(split_vcf), path(split_vcf_tbi)]])
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/base.config b/modules/ktest/calling/conf/base.config
new file mode 100644
index 0000000..0c1a014
--- /dev/null
+++ b/modules/ktest/calling/conf/base.config
@@ -0,0 +1,3 @@
+params {
+    cache_sing_folder = "/home/ktest/pipeline_env/software/truongphi"
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/calling.config b/modules/ktest/calling/conf/calling.config
new file mode 100644
index 0000000..d297b36
--- /dev/null
+++ b/modules/ktest/calling/conf/calling.config
@@ -0,0 +1,5 @@
+params {
+    folder_ref            = "/home/ktest/pipeline_env/database/Variant_Calling/hg38"
+    genome_name           = "Homo_sapiens.GRCh38.dna.primary_assembly.fa"
+    human_knownsite_vcf   = "/home/ktest2/project/PRS/PRS-54/PRS-96/*.{vcf.gz,vcf.gz.tbi}"
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/input.config b/modules/ktest/calling/conf/input.config
new file mode 100644
index 0000000..96f4cf4
--- /dev/null
+++ b/modules/ktest/calling/conf/input.config
@@ -0,0 +1,3 @@
+params{
+    from_mapping_csv = ""
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/ktest_cluster.config b/modules/ktest/calling/conf/ktest_cluster.config
new file mode 100644
index 0000000..82694ac
--- /dev/null
+++ b/modules/ktest/calling/conf/ktest_cluster.config
@@ -0,0 +1,19 @@
+executor{
+    name                      = 'slurm'
+    queueSize                 = 30
+}
+
+process{
+
+    errorStrategy                     = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+    maxRetries                        =  5
+
+    queue                             = 'prod'
+    maxForks                          = 30
+}
+
+singularity{
+    enabled = true
+    cacheDir   = "$params.cache_sing_folder"
+    runOptions = "--bind /home/"
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/test.config b/modules/ktest/calling/conf/test.config
new file mode 100644
index 0000000..ec45229
--- /dev/null
+++ b/modules/ktest/calling/conf/test.config
@@ -0,0 +1,4 @@
+params {
+    from_mapping = "/home/ktest/project/truongphi/PRS/PRS-21/PRS-66/from_mapping.tsv"
+    bam_folder   = "/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling"
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/test/test_human.config b/modules/ktest/calling/conf/test/test_human.config
new file mode 100644
index 0000000..d6ebb9c
--- /dev/null
+++ b/modules/ktest/calling/conf/test/test_human.config
@@ -0,0 +1,3 @@
+params {
+    from_mapping_tsv = "$projectDir/tests/test_sample_human.tsv"
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/conf/test/test_pig.config b/modules/ktest/calling/conf/test/test_pig.config
new file mode 100644
index 0000000..07517b1
--- /dev/null
+++ b/modules/ktest/calling/conf/test/test_pig.config
@@ -0,0 +1,6 @@
+params {
+    from_mapping_tsv    = "$projectDir/tests/test_sample_pig.tsv"
+    
+    folder_ref          = "/home/ktest2/project/PRS/PRS-62/PRS-199/test_data/pig_reference"
+    genome_name         = "GCA_000003025.6_Sscrofa11.1_genomic.fa"
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/main.nf b/modules/ktest/calling/main.nf
new file mode 100644
index 0000000..c4ee652
--- /dev/null
+++ b/modules/ktest/calling/main.nf
@@ -0,0 +1,21 @@
+#!/usr/bin/env nextflow
+include { CALLING }             from            "./calling.nf"
+
+workflow{
+    //INPUT CHANEL
+    from_mapping      = channel.fromPath("$params.from_mapping_tsv")
+                               .splitCsv(skip: 1, sep: '\t')                                                        // ([val(key), val(rg_id), val(object), path(dedup_bam), path(dedup_bai)]) 
+
+    calling_reference = channel.fromPath("${params.folder_ref}")
+                                .combine(channel.of("${params.genome_name}"))
+                                .combine(channel.fromPath("${params.human_knownsite_vcf}").collect().map{[it]})     // ([path(folder_ref), val(genome_ref_name), [human_knownsite_vcf, human_knownsite_vcf_tbi])
+    //CALLING
+    CALLING(
+        from_mapping,
+        calling_reference
+    )
+
+    //EMIT
+    //CALLING.split_vcf                                                                                             // ([val(key), val(object), val(chr), [path(split_vcf), path(split_vcf_tbi)]])
+
+}
diff --git a/modules/ktest/calling/modules/apply_bqsr.nf b/modules/ktest/calling/modules/apply_bqsr.nf
new file mode 100644
index 0000000..a2191b5
--- /dev/null
+++ b/modules/ktest/calling/modules/apply_bqsr.nf
@@ -0,0 +1,28 @@
+process APPLY_BQSR{
+
+    tag "$key:$object:$sample_id"
+
+    container "phinguyen2000/gatk_tabix:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+
+    input:
+    tuple val(key), val(object), val(sample_id), path(dedup_bam), path(dedup_bai), path(recal_data_table), path(folder_ref), val(genome_name)
+
+
+    output:
+    tuple val(key), val(object), val(sample_id), path("${key_string}_${object}_${sample_id}.recal.bam"), emit: recal_bam
+
+    script:
+    key_string = key ? key.join("-") : key
+
+    """
+    gatk  ApplyBQSR \
+            -R "$folder_ref/$genome_name" \
+            -I $dedup_bam \
+            -bqsr $recal_data_table\
+            -O "${key_string}_${object}_${sample_id}.recal.bam"
+    
+    """
+
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/modules/base_recalibrator.nf b/modules/ktest/calling/modules/base_recalibrator.nf
new file mode 100644
index 0000000..4eca852
--- /dev/null
+++ b/modules/ktest/calling/modules/base_recalibrator.nf
@@ -0,0 +1,32 @@
+process BASE_RECALIBRATOR{
+    tag "$key:$object:$rg_id"
+
+    container "phinguyen2000/gatk_tabix:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+
+    input:
+    tuple val(key), val(object), 
+          val(rg_id), path(dedup_bam), 
+          path(dedup_bai), path(joint_genotyped_draft_vcf), 
+          path(folder_ref), val(genome_name),
+          path(human_knownsite_vcf)
+    output:
+    tuple val(key), val(object), val(rg_id), path("${key_string}_${rg_id}.recal_data.table"), emit: recal_data_table
+
+    script:
+    if (joint_genotyped_draft_vcf.getName() == 'null'){
+        known_site = "${human_knownsite_vcf[0]}"
+    }else{
+        known_site = joint_genotyped_draft_vcf[0]
+    }
+    key_string = key ? key.join("-") : key
+
+    """
+    gatk BaseRecalibrator \
+        -I $dedup_bam \
+        -R "$folder_ref/$genome_name" \
+        --known-sites $known_site \
+        -O ${key_string}_${rg_id}.recal_data.table
+    """
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/modules/call_variants.nf b/modules/ktest/calling/modules/call_variants.nf
new file mode 100644
index 0000000..35f2932
--- /dev/null
+++ b/modules/ktest/calling/modules/call_variants.nf
@@ -0,0 +1,24 @@
+process CALL_VARIANTS{
+    tag "$key:$object:$rg_id"
+
+    container "phinguyen2000/gatk_tabix:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+    input:
+    tuple val(key), val(object), val(rg_id), path(recal_bam), path(folder_ref), val(genome_name)
+
+
+    output:
+    tuple val(key), val(object), val(rg_id), path("${key_string}_${object}_${rg_id}.variants.recal.vcf.{gz,gz.tbi}"), emit: variants_recal_vcf
+
+    script:
+    key_string = key ? key.join("-") : key
+
+    """
+    gatk  --java-options "-Xmx4g" HaplotypeCaller \
+           -R "$folder_ref/$genome_name" \
+           -I "$recal_bam" \
+           -O "${key_string}_${object}_${rg_id}.variants.recal.vcf.gz"\
+           -ERC GVCF
+    """
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/modules/draft_calling.nf b/modules/ktest/calling/modules/draft_calling.nf
new file mode 100644
index 0000000..cc8a09f
--- /dev/null
+++ b/modules/ktest/calling/modules/draft_calling.nf
@@ -0,0 +1,25 @@
+process DRAFT_CALLING{
+    tag "$key:$object:$rg_id"
+
+    container  "phinguyen2000/gatk_tabix:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+
+    input:
+    tuple val(key), val(object), val(rg_id), path(dedup_bam), path(dedup_bai), path(folder_ref), val(genome_name)
+
+
+    output:
+    tuple val(key), val(object), val(rg_id), path("${key_string}_${rg_id}.raw_variants.vcf.gz"), emit: raw_variants_vcf
+
+    script:
+    key_string = key ? key.join("-") : key
+
+    """
+    gatk  --java-options "-Xmx4g" HaplotypeCaller \
+           -R "$folder_ref/$genome_name" \
+           -I "$dedup_bam"\
+           -O "${key_string}_${rg_id}.raw_variants.vcf.gz"\
+           -ERC GVCF
+    """
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/modules/draft_join.nf b/modules/ktest/calling/modules/draft_join.nf
new file mode 100644
index 0000000..e7960b8
--- /dev/null
+++ b/modules/ktest/calling/modules/draft_join.nf
@@ -0,0 +1,36 @@
+process DRAFT_JOIN{
+    tag "$key:$object: #sample::${num_vcf_file}"
+
+    container  "phinguyen2000/gatk_tabix:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+    
+    input:
+    tuple val(key), val(object), val(rg_ids), path(raw_variants_vcf), path(folder_ref), val(genome_name)
+
+    output:
+    tuple val(key), val(object), path("${key_string}_joint_genotyped.draft.vcf.{gz,gz.tbi}"), emit: joint_genotyped_draft
+
+    script:
+    variant_option = ""
+    for (file in raw_variants_vcf){
+        variant_option += "--variant " + file.getName() + " "
+    }
+    num_vcf_file = rg_ids.size()
+    key_string = key ? key.join("-") : key
+
+    """
+    gatk IndexFeatureFile \
+        -I "$raw_variants_vcf"
+       
+    gatk CombineGVCFs \
+        -R "$folder_ref/$genome_name" \
+        $variant_option \
+        -O ${key_string}_cohort.draft.vcf.gz
+
+    gatk GenotypeGVCFs \
+        -R "$folder_ref/$genome_name" \
+        -V ${key_string}_cohort.draft.vcf.gz \
+        -O ${key_string}_joint_genotyped.draft.vcf.gz
+    """
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/modules/joining.nf b/modules/ktest/calling/modules/joining.nf
new file mode 100644
index 0000000..76560f2
--- /dev/null
+++ b/modules/ktest/calling/modules/joining.nf
@@ -0,0 +1,34 @@
+process JOINING{
+    tag "$key:$object:#sample::${num_vcf_file}"
+
+    container "phinguyen2000/gatk_tabix:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+
+    input:
+    tuple val(key), val(object), val(rg_ids), path(variants_vcfs), path(variants_vcf_tbis), path(folder_ref), val(genome_name)
+
+
+    output:
+    tuple val(key), val(object), path("${key_string}_${object}_joint_genotyped.vcf.{gz,gz.tbi}"), emit: cohort_vcf
+
+    script:
+    variant_option = ""
+    for (file in variants_vcfs){
+        variant_option += "--variant " + file.getName() + " "
+    }
+
+    num_vcf_file = rg_ids.size()
+    key_string = key ? key.join("-") : key
+
+    """
+    gatk CombineGVCFs \
+        -R "$folder_ref/$genome_name" \
+        $variant_option \
+        -O ${key_string}_${object}_cohort.vcf.gz
+    gatk GenotypeGVCFs \
+        -R "$folder_ref/$genome_name" \
+        -V ${key_string}_${object}_cohort.vcf.gz \
+        -O ${key_string}_${object}_joint_genotyped.vcf.gz
+    """
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/modules/split_chr.nf b/modules/ktest/calling/modules/split_chr.nf
new file mode 100644
index 0000000..a1c3b00
--- /dev/null
+++ b/modules/ktest/calling/modules/split_chr.nf
@@ -0,0 +1,21 @@
+process SPLIT_CHR{
+    tag "$key:$object:chr$chr"
+
+    container "phinguyen2000/bcftools:v0.1.0"
+    memory   { 20.GB * task.attempt }
+    cpus     { 5 * task.attempt }
+
+    input:
+    tuple val(key), val(object), path(joint_genotyped), val(chr)
+
+    output:
+    tuple val(key), val(object), val(chr), path("${key_string}_${object}_chr${chr}_split.vcf.{gz,gz.tbi}"), emit: split_vcf
+
+    script:
+    key_string = key ? key.join("-") : key
+
+    """
+    bcftools view --regions "$chr" -O z -o "${key_string}_${object}_chr${chr}_split.vcf.gz" ${joint_genotyped[0]}
+    bcftools index -t ${key_string}_${object}_chr${chr}_split.vcf.gz
+    """
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/nextflow.config b/modules/ktest/calling/nextflow.config
new file mode 100755
index 0000000..a144cd5
--- /dev/null
+++ b/modules/ktest/calling/nextflow.config
@@ -0,0 +1,26 @@
+// Load params for inputs
+includeConfig 'conf/input.config'
+
+// Load params common for all modules
+includeConfig 'conf/base.config'
+
+// Load config for modules
+includeConfig 'conf/calling.config'
+
+
+nextflow.enable.dsl = 2
+
+tower {
+  enabled = true
+  accessToken = "$TOWER_ACCESS_TOKEN"
+  workspaceId = '222915005021784'
+}
+
+
+profiles{
+    ktest_cluster  { includeConfig 'conf/ktest_cluster.config' }
+
+    // test profiles
+    test_human     { includeConfig 'conf/test/test_human.config' }
+    test_pig       { includeConfig 'conf/test/test_pig.config'   }
+}
\ No newline at end of file
diff --git a/modules/ktest/calling/tests/test_sample_human.tsv b/modules/ktest/calling/tests/test_sample_human.tsv
new file mode 100644
index 0000000..8e79859
--- /dev/null
+++ b/modules/ktest/calling/tests/test_sample_human.tsv
@@ -0,0 +1,4 @@
+key	object	rg_id	dedup.bam	dedup.bai
+null	human	HG00096	/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling/HG00096_Capture.dedup.bam	/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling/HG00096_Capture.dedup.bai
+null	human	HG00102	/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling/HG00102_Capture.dedup.bam	/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling/HG00102_Capture.dedup.bai
+null	human	NA12878	/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling/NA12878_Capture.dedup.bam	/home/ktest/share/Working_folder/TRUONGPHI/test_data/test_calling/NA12878_Capture.dedup.bai
diff --git a/modules/ktest/calling/tests/test_sample_pig.tsv b/modules/ktest/calling/tests/test_sample_pig.tsv
new file mode 100755
index 0000000..0d0ffc8
--- /dev/null
+++ b/modules/ktest/calling/tests/test_sample_pig.tsv
@@ -0,0 +1,2 @@
+key	object	rg_id	dedup.bam	dedup.bai
+null	pig	SRR5336868	/home/ktest2/project/PRS/PRS-62/PRS-199/test_data/subsampled_SRR5336868.bam	/home/ktest2/project/PRS/PRS-62/PRS-199/test_data/subsampled_SRR5336868.bam.bai
\ No newline at end of file

Process	Input Channel	Output Channel	Description
DRAFT_CALLING	[val(key), al(object), val(rg_id), path(dedup_bam), path(dedup_bai), path(folder_ref), val(genome_name)]	[val(key), val(object), val(rg_id), path(raw_variants_vcf)	Draft variant calling
DRAFT_JOIN	[val(key), val(object), path(raw_variants_vcf), path(folder_ref), val(genome_name)]	[val(key), val(object), path(joint_genotyped_draft{vcf.gz,vcf.gz.idx})]	Join draft variant VCF files into a draft cohort VCF file
BASE_RECALIBRATOR	[val(object), val(rg_id), path(dedup_bam), path(dedup_bai), val(joint_genotyped_draft_vcf), val(genome_name), path(human_knownsite_vcf)]	[val(object), val(rg_id), path(recal_data_table)]	Statistic impute and Overwrite original reported quality score in each mismatch with reference
APPLY_BQSR	[val(key), val(object), val(sample_id), path(dedup_bam), path(dedup_bai), path(recal_data_table), path(folder_ref), val(genome_name)]	[val(key), val(object), val(sample_id), path(recal_bam)]	Apply base quality score recalibration
CALL_VARIANTS	[val(key), val(object), val(rg_id), path(recal_bam)]	[val(key), val(object), val(rg_id), path(recal_vcf)]	Call germline SNPs and indels via local re-assembly of haplotypes
JOINING	[val(key), val(object), val(rg_id), path(variants_vcf), path(folder_ref), val(genome_name)	[val(key), val(object), path(joint_genotyped{vcf.gz,vcf.gz.tbi})]	Join variant VCF files into a draft cohort VCF file
SPLIT_CHR	[val(key), val(object), path(joint_genotyped), val(chr)	[val(key), val(object), val(chr), path(split_vcf{vcf.gz,vcf.gz.tbi})]	Split the cohort vcf file by chromosome