Skip to content

Commit

Permalink
separated routines for merging hla alleles to allow incorporating cus…
Browse files Browse the repository at this point in the history
…tom alleles more easier
  • Loading branch information
riasc committed Jan 16, 2024
1 parent 32e81c8 commit 97a9eeb
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 55 deletions.
11 changes: 5 additions & 6 deletions .tests/integration/config_basic/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ basequal: 20 # overall required base quality

### data
data:
name: patient2_test
name: basic_sample
dnaseq:
dna_normal: TESLA_testdata/patient2/WES/TESLA_9_1.fastq.gz TESLA_testdata/patient2/WES/TESLA_9_2.fastq.gz
dna_tumor: TESLA_testdata/patient2/WES/TESLA_10_1.fastq.gz TESLA_testdata/patient2/WES/TESLA_10_2.fastq.gz
rnaseq:
rna_tumor: TESLA_testdata/patient2/RNA/TESLA_11_1.fastq.gz TESLA_testdata/patient2/RNA/TESLA_11_2.fastq.gz
normal: dna_normal

custom:
variants:
hlatyping:
Expand Down Expand Up @@ -84,16 +84,15 @@ quantification:
mode: BOTH # RNA, RNA or BOTH

hlatyping:
class: BOTH # I, II or BOTH
mode: BOTH # DNA, RNA or BOTH
class: I # I, II or BOTH
# specific path for class II hlatyping (only required when class: II, or BOTH)
MHC-I_mode: BOTH # DNA, RNA, or BOTH (if empty alleles have to be specified in custom)
MHC-I_mode: DNA, RNA # DNA, RNA, or BOTH (if empty alleles have to be specified in custom)
MHC-II_mode: BOTH # DNA, RNA, or BOTH (if empty alleles have to be specified in custom)
freqdata: ./hlahd_files/freq_data/
split: ./hlahd_files/HLA_gene.split.txt
dict: ./hlahd_files/dictionary/

priorization:
prioritization:
class: I # I, II or BOTH
lengths:
MHC-I: 8,9,10,11
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ cd ScanNeo2
### Running the Workflow
To run the workflow, use the following command:
To run the workflow, use the following command:
```bash
cd /path/to/your/working/directory/
Expand Down
30 changes: 24 additions & 6 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,11 @@ def aggregate_mhcI_PE(wildcards):
no=glob_wildcards(os.path.join(checkpoint_output, "R1_{no}.bam")).no)


def get_mhcI_alleles(wildcards):
def get_predicted_mhcI_alleles(wildcards):
values = []

# routines to genotype from DNA
if config['hlatyping']['MHC-I_mode'] in ['DNA', 'BOTH']:
if "DNA" in config['hlatyping']['MHC-I_mode']:
if config['data']['dnaseq'] is not None:
for key in config['data']['dnaseq'].keys():
if key not in config['data']['normal']:
Expand All @@ -216,7 +216,7 @@ def get_mhcI_alleles(wildcards):
print('dnaseq data has not been specified in the config file, but specified mode for hla genotyping in config file is DNA or BOTH -- will be ignored')

# routines to genotype from RNA
if config['hlatyping']['MHC-I_mode'] in ['RNA', 'BOTH']:
if "RNA" in config['hlatyping']['MHC-I_mode']:
if config['data']['rnaseq'] is not None:
for key in config['data']['rnaseq'].keys():
if key not in config['data']['normal']:
Expand All @@ -230,8 +230,11 @@ def get_mhcI_alleles(wildcards):


# if alleles have been specified in the config file, add them to the list
if config['data']['custom']['hlatyping']['MHC-I'] is not None:
values.append(config['custom']['hlatyping']['MHC-I'])
#if "custom" in config['hlatyping']['MHC-I_mode']:
#if config['data']['custom']['hlatyping']['MHC-I'] is not None:
#values.append(config['custom']['hlatyping']['MHC-I'])
#if config['data']['custom']['hlatyping']['MHC-I'] is not None:
#values.append(config['custom']['hlatyping']['MHC-I'])


if len(values) == 0:
Expand All @@ -240,14 +243,28 @@ def get_mhcI_alleles(wildcards):

return values

def get_all_mhcI_alleles{wildcards):
values = []

##### MHC CLASS I
if ("DNA" in config['hlatyping']['MHC-I_mode'] or
"RNA" in config['hlatyping']['MHC-I_mode']):
values += expand("results/{sample}/hla/mhc-I/genotyping/mhc-I.tsv",
sample = wildcards.sample)

if "custom" in config["hlatyping"]["MHC-I_mode"]:
values += config["data"]["custom"]["hlatyping"]["MHC-I"]

if len(values) == 0:
print('No hla data found. Check config file for correct specification of data and hla genotyping mode')
sys.exit(1)

return values



##### MHC CLASS I


# returns list of hla typing results for the given sample and group

###### MHC Class II #########
Expand Down Expand Up @@ -555,6 +572,7 @@ def get_mhcI(wildcards):
if config['prioritization']['class'] in ['I', 'BOTH']:
alleles += expand("results/{sample}/hla/mhc-I.tsv",
sample=config['data']['name'])

return alleles

def get_mhcII(wildcards):
Expand Down
28 changes: 23 additions & 5 deletions workflow/rules/hlatyping.smk
Original file line number Diff line number Diff line change
Expand Up @@ -288,23 +288,41 @@ rule combine_mhcI_PE:
'{input}' {output}
"""

rule merge_mhcI_allels:
rule merge_predicted_mhcI_allels:
input:
get_mhcI_alleles
get_predicted_mhcI_alleles
output:
"results/{sample}/hla/mhc-I.tsv",
"results/{sample}/hla/genotyping/mhc-I.tsv",
message:
"Merging HLA alleles from different sources"
log:
"logs/{sample}/optitype/merge_classI_alleles.log"
"logs/{sample}/optitype/merge_predicted_mhc-I.log"
conda:
"../envs/basic.yml"
threads: 1
shell:
"""
python workflow/scripts/merge_mhcI_alleles.py \
python workflow/scripts/genotyping/merge_predicted_mhcI.py \
'{input}' {output}
"""

rule combine_all_mhcI_alleles:
input:
get_all_mhcI_alleles:
output:
"results/{sample}/hla/mhc-I.tsv"
message:
"Combining HLA alleles from different sources"
log:
"logs/{sample}/genotyping/combine_all_mhc-I.log"
conda:
"../envs/basic.yml"
threads: 1
shell:
"""
python workflow/scripts/genotyping/combine_all_alleles.py \
'{input}' {output} > {log} 2>&1\
"""

######### MHC-II HLA GENOTYPING ###########
rule filter_reads_mhcII_PE:
Expand Down
37 changes: 0 additions & 37 deletions workflow/scripts/merge_mhcI_alleles.py

This file was deleted.

0 comments on commit 97a9eeb

Please sign in to comment.