ensuring all in place

wtsi-hgi · May 24, 2024 · e25e274 · e25e274
1 parent a0b20fc
commit e25e274
Show file tree

Hide file tree

Showing 12 changed files with 171 additions and 69 deletions.
diff --git a/bin/gather_minimal_dataset.py b/bin/gather_minimal_dataset.py
@@ -46,8 +46,11 @@
 
 COLUMNS_AZIMUTH = {
     'Azimuth:predicted.celltype.l2': 'azimuth.celltyp.l2',
+    'Azimuth:predicted.celltype.l3': 'azimuth.celltyp.l3',
+    'Azimuth:predicted.celltype.l1': 'azimuth.celltyp.l1',
     'Azimuth:predicted.celltype.l2.score': 'azimuth.pred.score.l2',
     'Azimuth:mapping.score.celltype.l2': 'azimuth.map.score',
+    'Celltypist*':'Celltypist*',
     }
 
 COLUMNS_DECONV = {
@@ -66,9 +69,10 @@
     'total_counts': 'qc.umi.count.total',
     'total_counts_gene_group__mito_transcript': 'qc.umi.count.mt',
     'pct_counts_gene_group__mito_transcript': 'qc.umi.perc.mt',
+    'pct_counts_in_top_500_genes':'pct_counts_in_top_500_genes',
     'n_genes_by_counts': 'qc.genes.detected.count',
-    'Azimuth:L0_Azimuth:predicted.celltype.l2':'azimuth.celltyp.l0',
-    'Azimuth:L1_Azimuth:predicted.celltype.l2':'azimuth.celltyp.l1',
+    'Azimuth:L0_Azimuth:predicted.celltype.l2':'azimuth.celltyp.l0.mapped_fromL2',
+    'Azimuth:L1_Azimuth:predicted.celltype.l2':'azimuth.celltyp.l1.mapped_fromL2',
     'total_counts_gene_group__mito_transcript':'total_counts_gene_group__mito_transcript',
     'pct_counts_gene_group__mito_transcript':'pct_counts_gene_group__mito_transcript',
     'total_counts_gene_group__mito_protein':'total_counts_gene_group__mito_protein',
@@ -91,7 +95,8 @@
     'experiment_id': 'experiment.id',
     'tranche.id':'tranche.id',
     'chromium_run_id': 'chromium.run.id',
-    'chromium_lane': 'chromium.lane'
+    'chromium_lane': 'chromium.lane',
+    'instrument':'instrument'
     }
 COLUMNS_SCRUBLET = {
     'scrublet__multiplet_scores': 'scrublet.scores',
@@ -229,8 +234,9 @@ def load_scrublet_assignments(expid, datadir_scrublet):
     filpath = None
     fnam = '{}{}'.format(expid, SCRUBLET_ASSIGNMENTS_FNSUFFIX)
     fnam2 = '{}{}'.format(expid, SCRUBLET_ASSIGNMENTS_FNSUFFIX.replace('-',''))
+    fnam3 = '{}{}'.format(expid, 'scrublet.tsv.gz')
     for fn in os.listdir(datadir_scrublet):
-        if fn == fnam or fn == fnam2:
+        if fn == fnam or fn == fnam2 or fn == fnam3:
             filpath = os.path.join(datadir_scrublet, fn)
             break
 
@@ -327,9 +333,15 @@ def gather_donor(donor_id, ad, ad_lane_raw, azimuth_annot, qc_obs, columns_outpu
     ad.raw = ad_lane_raw[ad.obs.index, :]
     if donor_id != "unassigned" and donor_id != "doublet":
         # add annotation from QC
-        df = pandas.concat([ad.obs, azimuth_annot.loc[azimuth_annot.Donor == donor_id]], axis = 1, join = 'outer')
+        donor_azt = azimuth_annot.loc[azimuth_annot.Donor == donor_id]
+        donor_azt = donor_azt.loc[donor_azt.Exp == expid]
+        donor_azt['barcode'] = donor_azt.index.str.split('-').str[0]+'-'+donor_azt.index.str.split('-').str[1]
+        donor_azt = donor_azt.set_index('barcode')
+        df = pandas.concat([ad.obs, donor_azt], axis = 1, join = 'outer')
         df =df.loc[:,~df.columns.duplicated()]
-        df = df[['experiment_id'] + list(COLUMNS_DECONV.keys()) + list(COLUMNS_AZIMUTH.keys())]
+        df = df[['experiment_id'] + 
+                list(set(df.columns).intersection(set(COLUMNS_DECONV.keys()))) + 
+                list(set(df.columns).intersection(set(COLUMNS_AZIMUTH.keys())))]
         try:
             df = get_lane_and_runid_from_experiment_id(df, insert_pos = 1)
         except:
@@ -513,7 +525,10 @@ def gather_pool(expid, args, df_raw, df_cellbender, adqc, oufh = sys.stdout,lane
     ##########################
     # Scrublet
     #########################
-    datadir_scrublet=glob.glob(f'{args.results_dir}/*/multiplet.method=scrublet')[0]
+    try:
+        datadir_scrublet=glob.glob(f'{args.results_dir}/*/multiplet.method=scrublet')[0]
+    except:
+        datadir_scrublet=glob.glob(f'{args.results_dir}/multiplet.method=scrublet')[0]
     if os.path.isdir(datadir_scrublet):
         # Scrublet loading QC
         try:

diff --git a/bin/run_celltypist.py b/bin/run_celltypist.py
@@ -125,7 +125,7 @@ def run_celltypist(samplename, filtered_matrix_h5, celltypist_model,
     #       try the .raw.X attribute.
     # If none of them fit into the desired data type or the expression matrix is not properly normalised, an error will be raised.
     logging.info('... running sc.pp.normalize_total(adata, target_sum=1e4)')
-    sc.pp.normalize_total(adata, target_sum=1e4)
+    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
     logging.info('... running sc.pp.log1p(adata, copy = False)')
     sc.pp.log1p(adata, copy = False)
 

diff --git a/bin/scanpy_split_h5ad.py b/bin/scanpy_split_h5ad.py
@@ -32,7 +32,11 @@ def write_h5_out_for_ct(ad,oufn_list_AZ,oufnam,oufn_list,samples,samples_AZ,bl,c
     # assumes that cells failing qc already were stripped out
     # ad.obs = ad.obs[['convoluted_samplename', 'cell_passes_qc']]
     # ad.var = ad.var[['feature_types', 'genome', 'gene_symbols']]
-    adb.obs['log10_ngenes_by_count'] = np.log10(adb.obs['n_genes_by_counts']) / np.log10(adb.obs['total_counts'])
+    try:
+        adb.obs['log10_ngenes_by_count'] = np.log10(adb.obs['n_genes_by_counts']) / np.log10(adb.obs['total_counts'])
+    except:
+        print('some cols dont exist')
+
     adb_AZ = adb.copy()
     # disable
     adb_AZ.obs = pandas.DataFrame(adb_AZ.obs.index, index = adb_AZ.obs.index, columns = ["cell_barcode"])
@@ -70,7 +74,10 @@ def write_h5_out_for_ct(ad,oufn_list_AZ,oufnam,oufn_list,samples,samples_AZ,bl,c
         adb_AZ.var['genome']='GRCh38'
         vdf = adb_AZ.var[["feature_types", "genome"]]
     vdf.insert(1,"gene_ids", vdf.index)
-    vdf.index = pandas.Index(adb_AZ.var['gene_symbols'].astype('str'))
+    try:
+        vdf.index = pandas.Index(adb_AZ.var['gene_symbols'].astype('str'))
+    except:
+        print('gene_symbols not available')
     #ad.var = vdf.set_index("gene_symbols", drop = True, verify_integrity = False)
     adb_AZ.var = vdf
 
@@ -103,6 +110,12 @@ def split_h5ad_by_batch(ad, oufnprfx, colnam_batch = 'batch', anndata_compressio
     oufn_list_fnam = '{}_files.txt'.format(oufnprfx)
     oufn_list = []
     oufn_list_AZ = []
+    try:
+        print(set(ad.obs[colnam_batch]))
+        # here we do not have a batch id since the data is not deconvoluted yet
+    except:
+        ad.obs[colnam_batch] = oufnprfx
+
     batch_labels = pandas.Categorical(ad.obs[colnam_batch].apply(lambda a: a.split('__')[0])) # <class 'pandas.core.series.Series'>
     samples = {}
     samples_AZ = {}

diff --git a/bin/transfer_data.py b/bin/transfer_data.py
@@ -83,47 +83,77 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
                     print('missing6')
     # Fetch Gather
     df_raw = pd.read_table(input_table, index_col = 'experiment_id')
+
     # 'Note that the names for the future projects may be different - have to be handled on the Nextflow modules'
     print('prepearing fetch folder')
     try:
         os.mkdir(f'{name_dir}/Fetch Pipeline')
     except:
         print('exists')
     metadata_table = pd.DataFrame()
-    d1 = glob.glob(f"{results_dir}/*/web_summary.html")
-    d2 = glob.glob(f"{results_dir}/*/*/web_summary.html")
-    d3 = glob.glob(f"{results_dir}/*/*/*/web_summary.html")
-    d4 =glob.glob(f"{results_dir}/*/*/*/*/web_summary.html")
-    ts3 = [*d1, *d2,*d3,*d4]
-    ts3 = pd.DataFrame(ts3,columns=['col'])
-    for folder in df_raw.index:
-        dir3 = f"{df_raw.loc[folder, 'data_path_10x_format']}"
-        try:
-            copyfile(f'{dir3}/web_summary.html', f'{name_dir}/Fetch Pipeline/html_{folder}.html')
-            metadata = pd.read_csv(f'{dir3}/metrics_summary.csv',sep=',',index_col=False)
-            metadata['Sample_id']=folder
+
+    ts3 = []
+    c1 =0
+    for i,p2 in  df_raw.iterrows():
+        # print(p1)
+        p1 = p2['data_path_10x_format']
+
+        d1 = glob.glob(f"{p1}/*/web_summary.html")
+        d2 = glob.glob(f"{p1}/*/*/web_summary.html")
+        d3 = glob.glob(f"{p1}/*/*/*/web_summary.html")
+        d4 =glob.glob(f"{p1}/*/*/*/*/web_summary.html")
+        ts3 = [ *d1, *d2,*d3,*d4]
+        for t1 in ts3:
+            copyfile(t1, f'{name_dir}/Fetch Pipeline/html_{i}.html')
+
+        d1 = glob.glob(f"{p1}/*/metrics_summary.csv")
+        d2 = glob.glob(f"{p1}/*/*/metrics_summary.csv")
+        d3 = glob.glob(f"{p1}/*/*/*/metrics_summary.csv")
+        d4 =glob.glob(f"{p1}/*/*/*/*/metrics_summary.csv")
+        ts3 = [ *d1, *d2,*d3,*d4]
+        for t1 in ts3:   
+            metadata = pd.read_csv(t1,sep=',',index_col=False)
+            metadata['Sample_id']=i
             metadata.set_index('Sample_id',drop=True,inplace=True)
-            metadata_table=pd.concat([metadata_table,metadata])
-        except:
-            try:
-                try:
-                    copyfile(f'{results_dir}/handover/{name_dir}/Fetch Pipeline/html_{folder}.html', f'{name_dir}/Fetch Pipeline/html_{folder}.html')
-                except:
-                    try:
-                        copyfile(ts3[ts3['col'].str.contains(f'{folder}')]['col'].values[0], f'{name_dir}/Fetch Pipeline/html_{folder}.html')
-                    except:    
-                        copyfile(f'/lustre/scratch123/hgi/projects/ukbb_scrna/pipelines/Pilot_UKB/qc/{name}/results_rsync2/results/handover/{name_dir}/Fetch Pipeline/html_{folder}.html', f'{name_dir}/Fetch Pipeline/html_{folder}.html')
-            except:
-                _='pass'
             try:
-                metadata_table = pd.read_csv(f'{results_dir}/handover/{name_dir}/Fetch Pipeline/CSV/Submission_Data_Pilot_UKB.file_metadata.tsv',sep='\t')
-                metadata_table.set_index('Sample_id',drop=True,inplace=True)
+                if all(metadata_table.columns == metadata.columns):
+                    metadata_table=pd.concat([metadata_table,metadata.T]) 
+                else:      
+                    metadata_table=pd.concat([metadata_table,metadata]) 
             except:
-                try:
-                    metadata_tablemetadata_table = pd.read_csv(f'{results_dir}/handover/{name_dir}/Fetch Pipeline/Submission_Data_Pilot_UKB.file_metadata.tsv',sep=',')
-                    metadata_table.set_index('Sample_id',drop=True,inplace=True)
-                except:
-                    metadata_table = pd.DataFrame()
+                metadata_table=pd.concat([metadata_table,metadata]) 
+    # ts3 = pd.DataFrame(ts3,columns=['col'])
+
+    # for
+
+    # for folder in df_raw.index:
+    #     dir3 = f"{df_raw.loc[folder, 'data_path_10x_format']}"
+    #     try:
+    #         copyfile(f'{dir3}/web_summary.html', f'{name_dir}/Fetch Pipeline/html_{folder}.html')
+    #         metadata = pd.read_csv(f'{dir3}/metrics_summary.csv',sep=',',index_col=False)
+    #         metadata['Sample_id']=folder
+    #         metadata.set_index('Sample_id',drop=True,inplace=True)
+    #         metadata_table=pd.concat([metadata_table,metadata])
+    #     except:
+    #         try:
+    #             try:
+    #                 copyfile(f'{results_dir}/handover/{name_dir}/Fetch Pipeline/html_{folder}.html', f'{name_dir}/Fetch Pipeline/html_{folder}.html')
+    #             except:
+    #                 try:
+    #                     copyfile(ts3[ts3['col'].str.contains(f'{folder}')]['col'].values[0], f'{name_dir}/Fetch Pipeline/html_{folder}.html')
+    #                 except:    
+    #                     copyfile(f'/lustre/scratch123/hgi/projects/ukbb_scrna/pipelines/Pilot_UKB/qc/{name}/results_rsync2/results/handover/{name_dir}/Fetch Pipeline/html_{folder}.html', f'{name_dir}/Fetch Pipeline/html_{folder}.html')
+    #         except:
+    #             _='pass'
+    #         try:
+    #             metadata_table = pd.read_csv(f'{results_dir}/handover/{name_dir}/Fetch Pipeline/CSV/Submission_Data_Pilot_UKB.file_metadata.tsv',sep='\t')
+    #             metadata_table.set_index('Sample_id',drop=True,inplace=True)
+    #         except:
+    #             try:
+    #                 metadata_tablemetadata_table = pd.read_csv(f'{results_dir}/handover/{name_dir}/Fetch Pipeline/Submission_Data_Pilot_UKB.file_metadata.tsv',sep=',')
+    #                 metadata_table.set_index('Sample_id',drop=True,inplace=True)
+    #             except:
+    #                 metadata_table = pd.DataFrame()
     try:
         os.mkdir(f'{name_dir}/Fetch Pipeline/CSV')
     except:

diff --git a/conf/modules.conf b/conf/modules.conf
@@ -235,7 +235,7 @@ process {
     }
 
     withName: CONCORDANCE_CALCLULATIONS{
-        cpus = 10
+        cpus   = {  10     * task.attempt }
         time   = {  24.h   * task.attempt }
         memory = { 80.GB * task.attempt }
     }

diff --git a/modules/nf-core/modules/gather_data/main.nf b/modules/nf-core/modules/gather_data/main.nf
@@ -20,7 +20,7 @@ process GATHER_DATA{
     output:
       path("${subdir}", emit:outfiles_dataset)
       path("${subdir}_summary", emit:outfiles_dataset2)
-
+      path("Donor_Quantification/*/*.tsv", emit: barcodes_files)
       val(outdir, emit: outdir_dataset)
 
     script:

diff --git a/modules/nf-core/modules/split_batch_h5ad/main.nf b/modules/nf-core/modules/split_batch_h5ad/main.nf
@@ -11,7 +11,7 @@ process SPLIT_BATCH_H5AD {
     }
 
     input:
-        path(file__anndata) // anndata h5ad file seurat_azimuth_pbmc_1.0
+        tuple val(name), path(file__anndata) // anndata h5ad file seurat_azimuth_pbmc_1.0
         val(mode)
 
     output:

diff --git a/modules/nf-core/modules/subset_bam_per_barcodes_and_variants/main.nf b/modules/nf-core/modules/subset_bam_per_barcodes_and_variants/main.nf
@@ -23,4 +23,40 @@ process SUBSET_BAM_PER_BARCODES_AND_VARIANTS {
             filter_bam_file_for_popscle_dsc_pileup.sh ${bam} ${barcodes} srt_${vcf} ${sample}_filtered.bam
         """
 
+}
+
+
+process SUBSET_BAM_PER_BARCODES{
+
+
+    tag "${samplename}"    
+    label 'process_low'
+
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "/lustre/scratch123/hgi/projects/ukbb_scrna/pipelines/Pilot_UKB/carls_data/analysis/bam_tool_processing_05_04_2024.sif"
+    } else {
+        container "mercury/bam_tool_processing:05_04_2024"
+    }
+
+    publishDir  path: "${params.outdir}/handover/Donor_Quantification/${sample}",
+                mode: "${params.copy_mode}",
+                overwrite: "true"
+
+    input:
+
+    // CRD_CMB12979963, /lustre/scratch123/hgi/mdt2/teams/hgi/mo11/tmp_projects/cardinal/qc_F3/UKBB_ELGH_5th_July_2022/work/5c/dada93d9cbebef97d30e4014e131cf/input__CRD_CMB12979963/possorted_genome_bam.bam, /lustre/scratch123/hgi/mdt2/teams/hgi/mo11/tmp_projects/cardinal/qc_F3/UKBB_ELGH_5th_July_2022/work/5c/dada93d9cbebef97d30e4014e131cf/input__CRD_CMB12979963/possorted_genome_bam.bam.bai, /lustre/scratch123/hgi/mdt2/teams/hgi/mo11/tmp_projects/cardinal/qc_F3/UKBB_ELGH_5th_July_2022/results/nf-preprocessing/cellbender/CRD_CMB12979963/cellbender-epochs_250__learnrt_0pt000005__zdim_100__zlayer_500__lowcount_10/cellbender-FPR_0pt1-filtered_10x_mtx/barcodes.tsv.gz
+        tuple val(sample), val(sample_donor),path(barcodes), path(bam), path(bai), path(cellbender_barcodes)
+        path(genome)
+    output:
+        tuple val(sample),path("${sample_donor}.cram"), emit: cram_files
+    //     tuple val(sample), val("${donor}") ,path("${sample}_filtered.bam"),path("${sample}_filtered.bam.csi"), path(barcodes), emit: freebayes_input
+
+    script:
+
+        """ 
+            samtools view --threads ${task.cpus} --tag-file CB:${barcodes} \
+                --cram -T ${genome}/genome.fa \
+                -o ${sample_donor}.cram ${bam}
+        """
+
 }
diff --git a/subworkflows/celltype.nf b/subworkflows/celltype.nf
@@ -16,7 +16,8 @@ workflow celltype{
     main:
 
         log.info '---Splitting the assignment for each batch---'
-        file__anndata_merged.map{val1 -> tuple('full_ct', val1)}.set{out1}
+        // file__anndata_merged.map{val1 -> tuple('full_ct', val1)}.set{out1}
+        file__anndata_merged.subscribe { println "file__anndata_merged: $it" }
         SPLIT_BATCH_H5AD(file__anndata_merged,params.split_ad_per_bach)
 
         // Here we may want to not split it and just pass in an entire h5ad file for annotations.

diff --git a/subworkflows/data_handover.nf b/subworkflows/data_handover.nf
@@ -2,6 +2,7 @@ include { GATHER_DATA;  SPLIT_DATA_BY_STUDY} from "$projectDir/modules/nf-core/m
 include { ENCRYPT_DIR; ENCRYPT_TARGET } from "$projectDir/modules/local/encrypt"
 include { TRANSFER;SUMMARY_STATISTICS_PLOTS } from "$projectDir/modules/nf-core/modules/summary_statistics_plots/main"
 include { split_bam_by_donor } from "$projectDir/modules/local/cellranger_bam_per_donor"
+include { SUBSET_BAM_PER_BARCODES } from "$projectDir/modules/nf-core/modules/subset_bam_per_barcodes_and_variants/main"
 
 workflow data_handover{
     take:
@@ -10,23 +11,24 @@ workflow data_handover{
         qc_input
         ch_poolid_csv_donor_assignments
         sample_possorted_bam_vireo_donor_ids
-
+        genome
     main:
         log.info 'running data handover'
         // outdir = file(outdir)
         // outdir = file("${launchDir}/${outdir}").ifEmpty(outdir)
-
-
-
-
-
-
         GATHER_DATA(outdir,qc_input.collect(),input_channel)
 
+
         if (params.split_bam){
-            split_bam_by_donor(sample_possorted_bam_vireo_donor_ids, params.reference_assembly_fasta_dir_bam_split)
-            ENCRYPT_TARGET(split_bam_by_donor.out.possorted_cram_files)
-            cram_encrypted_dirs = ENCRYPT_TARGET.out.encrypted_dir
+            // val(sample), path(barcodes), path(bam)
+            GATHER_DATA.out.barcodes_files.flatten().map{sample -> tuple("${sample}".replaceFirst(/.*\//,"").replaceFirst(/\..*/,""),"${sample}".replaceFirst(/.*\//,"").replaceFirst(/\.tsv.*/,""),sample)}.set{barcodes}
+            barcodes.combine(sample_possorted_bam_vireo_donor_ids, by: 0).set{full_split_chanel_input}
+            // full_split_chanel_input.subscribe { println "full_split_chanel_input: $it" }
+            // genome.subscribe { println "genome: $it" }
+            SUBSET_BAM_PER_BARCODES(full_split_chanel_input,genome)
+            // split_bam_by_donor(sample_possorted_bam_vireo_donor_ids, genome)
+            // ENCRYPT_TARGET(split_bam_by_donor.out.possorted_cram_files)
+            // cram_encrypted_dirs = ENCRYPT_TARGET.out.encrypted_dir
         } else {
           cram_encrypted_dirs = Channel.empty()
         }

diff --git a/subworkflows/main_deconvolution.nf b/subworkflows/main_deconvolution.nf
@@ -17,7 +17,7 @@ include { match_genotypes } from './match_genotypes'
 include {ENHANCE_STATS_GT_MATCH } from "$projectDir/modules/nf-core/modules/genotypes/main"
 include {SUBSET_WORKF} from "$projectDir/modules/nf-core/modules/subset_genotype/main"
 include {REPLACE_GT_DONOR_ID2; REPLACE_GT_DONOR_ID2 as REPLACE_GT_DONOR_ID_SUBS } from "$projectDir/modules/nf-core/modules/genotypes/main"
-include { RETRIEVE_RECOURSES; STAGE_FILE } from "$projectDir/subworkflows/local/retrieve_recourses"
+include { STAGE_FILE } from "$projectDir/subworkflows/local/retrieve_recourses"
 include {GT_MATCH_POOL_IBD } from "$projectDir/modules/nf-core/modules/genotypes/main"
 include { MATCH_GT_VIREO } from "$projectDir/modules/nf-core/modules/genotypes/main"
 
@@ -42,16 +42,12 @@ workflow  main_deconvolution {
 		ch_experiment_donorsvcf_donorslist
         scrublet_paths
         vcf_input
+        genome
 
     main:
 		log.info "#### running DECONVOLUTION workflow #####"
 
-        if (params.reference_assembly_fasta_dir=='https://yascp.cog.sanger.ac.uk/public/10x_reference_assembly'){
-            RETRIEVE_RECOURSES()  
-            genome = RETRIEVE_RECOURSES.out.reference_assembly
-        }else{
-            genome = "${params.reference_assembly_fasta_dir}"
-        }
+
         vcf_candidate_snps = STAGE_FILE(params.cellsnp.vcf_candidate_snps)
         // genome.subscribe { println "genome: $it" }
 
@@ -303,10 +299,10 @@ workflow  main_deconvolution {
             //     .set { gt_math_pool_against_panel_input2 }
             FREEBAYES.out.gt_pool.groupTuple(by:0).set{fbb1}
 
-            fbb1.subscribe { println "fbb1: $it" }
+            // fbb1.subscribe { println "fbb1: $it" }
             MERGE_GENOTYPES_IN_ONE_VCF_IDX_PAN(fbb1,'freebayes')
             MERGE_GENOTYPES_IN_ONE_VCF_IDX_PAN.out.gt_pool.groupTuple(by:0).set{fbb2}
-            fbb2.subscribe { println "fbb2: $it" }
+            // fbb2.subscribe { println "fbb2: $it" }
             MERGE_GENOTYPES_IN_ONE_VCF_FREEBAYES(fbb2,'freebayes')
             gt_math_pool_against_panel_input2 = MERGE_GENOTYPES_IN_ONE_VCF_FREEBAYES.out.gt_pool.combine(ch_ref_vcf)
             // cell_assignments =