Skip to content

Commit

Permalink
Add StableLift - sSNV (#239)
Browse files Browse the repository at this point in the history
* Add StableLift as submodule

* Add StableLift module

* Update template to support stablelift

* Allow StableLift as selectable pipeline

* Update methods to handle StableLiftsSNV

* Update StableLift name to StableLiftsSNV

* Update name to StableLiftsSNV

* Add StableLiftsSNV to pipeline selector

* Update params to input StableLift rf models

* Add stablelift version to log

* Add calls to stablelift in metapipeline_DNA

* Add default.config

* Add module to create StableLift YAML

* Update type for param

* Add mode saving for StableLift

* Rename StableLift workflow

* Add StableLift process

* Add workflow for StableLift

* Add renaming to tool name

* Simulate param expansion from StableLift for validation

* Add exception to remove BCFtools-Intersect output from StableLift

* Output additional params needed for Stablelift

* Update params in Stablelift process

* Add placeholder NFTest case

* Update batch test to include StableLift

* Update CHANGELOG

* Remove release date from unreleased section

* Remove commented code
  • Loading branch information
yashpatel6 authored Jan 28, 2025
1 parent 16035db commit 668815f
Show file tree
Hide file tree
Showing 17 changed files with 414 additions and 7 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,6 @@
[submodule "external/pipeline-call-SRC"]
path = external/pipeline-call-SRC
url = [email protected]:uclahs-cds/pipeline-call-SRC.git
[submodule "external/pipeline-StableLift"]
path = external/pipeline-StableLift
url = [email protected]:uclahs-cds/pipeline-StableLift.git
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- StableLift for sSNV liftover

## [6.2.0] - 2024-11-22

### Changed
Expand Down
33 changes: 32 additions & 1 deletion config/custom_schema_types.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,21 @@ custom_schema_types {
'CRAM'
]

expected_stablelift_types = [
'GRCh37ToGRCh38',
'GRCh38ToGRCh37'
]

expected_stablelift_models = [
'HaplotypeCaller',
'Muse2',
'Mutect2',
'SomaticSniper',
'Strelka2',
'Delly2-gSV',
'Delly2-sSV'
]

/**
* Check that input types are in allowed list
*/
Expand Down Expand Up @@ -117,12 +132,28 @@ custom_schema_types {
}
}

/**
* Check proper readable models were given for StableLift
*/
check_stablelift_models = { Map options, String name, Map properties ->
custom_schema_types.check_if_namespace(options[name], name)

for (expected_type in custom_schema_types.expected_stablelift_types) {
assert options[name].containsKey(expected_type) : "`${expected_type}` must be included in the StableLift models param: `${name}`"
for (expected_model in custom_schema_types.expected_stablelift_models) {
assert options[name][expected_type].containsKey(expected_model) : "Model must be provided for `${expected_model}` for type `${expected_type}`"
schema.check_path(options[name][expected_type][expected_model] as String, 'r')
}
}
}

types = [
'InputNamespace': custom_schema_types.check_input_namespace,
'PatientNamespace': custom_schema_types.check_patient_namespace,
'SampleNamespace': custom_schema_types.check_sample_namespace,
'EntryList': custom_schema_types.check_entry_list,
'LaneType': custom_schema_types.check_lane_type,
'EmailAddress': custom_schema_types.check_email_address
'EmailAddress': custom_schema_types.check_email_address,
'StableLiftModels': custom_schema_types.check_stablelift_models
]
}
27 changes: 27 additions & 0 deletions config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,33 @@ params {
src_snv_tool = 'BCFtools-Intersect'
src_cna_tool = 'Battenberg'

// StableLift model definition
test_base = "/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/publish"
model_37_38 = "${test_base}/model/GRCh37-to-GRCh38/RF-model_GRCh37-to-GRCh38"
model_38_37 = "${test_base}/model/GRCh38-to-GRCh37/RF-model_GRCh38-to-GRCh37"

stablelift_models = [
GRCh37ToGRCh38: [
HaplotypeCaller: "${model_37_38}_gSNP_HaplotypeCaller.Rds",
Muse2: "${model_37_38}_sSNV_Muse2.Rds",
Mutect2: "${model_37_38}_sSNV_Mutect2.Rds",
SomaticSniper: "${model_37_38}_sSNV_SomaticSniper.Rds",
Strelka2: "${model_37_38}_sSNV_Strelka2.Rds",
"Delly2-gSV": "${model_37_38}_gSV_Delly2-gSV.Rds",
"Delly2-sSV": "${model_37_38}_sSV_Delly2-sSV.Rds"
],
GRCh38ToGRCh37: [
HaplotypeCaller: "${model_38_37}_gSNP_HaplotypeCaller.Rds",
Muse2: "${model_38_37}_sSNV_Muse2.Rds",
Mutect2: "${model_38_37}_sSNV_Mutect2.Rds",
SomaticSniper: "${model_38_37}_sSNV_SomaticSniper.Rds",
Strelka2: "${model_38_37}_sSNV_Strelka2.Rds",
"Delly2-gSV": "${model_38_37}_gSV_Delly2-gSV.Rds",
"Delly2-sSV": "${model_38_37}_sSV_Delly2-sSV.Rds"
]
]


// TO-DO: Support option below to run call-SRC on all combinations of sSNV tools and sCNA tools
src_run_all_combinations = false

Expand Down
86 changes: 84 additions & 2 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/methods/co
includeConfig "${projectDir}/config/pipeline_selector.config"
includeConfig "${projectDir}/config/input_handler.config"

import nextflow.Nextflow

def get_submodule_version(submodule) {
def manifest_locations = [new File("${projectDir}/external/${submodule}/nextflow.config"), new File("${projectDir}/external/${submodule}/pipeline/nextflow.config")]
def submodule_manifest = null
Expand Down Expand Up @@ -206,6 +208,7 @@ methods {
params.version_call_sCNA = get_submodule_version('pipeline-call-sCNA')
params.version_generate_SQC_BAM = get_submodule_version('pipeline-generate-SQC-BAM')
params.version_call_SRC = get_submodule_version('pipeline-call-SRC')
params.version_StableLift = get_submodule_version('pipeline-StableLift')
}

set_env = {
Expand Down Expand Up @@ -264,7 +267,8 @@ methods {
'pipeline-call-sCNA': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input'],
'pipeline-generate-SQC-BAM': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input'],
'pipeline-calculate-targeted-coverage': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input'],
'pipeline-call-SRC': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input']
'pipeline-call-SRC': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input'],
'pipeline-StableLift': ['sample_id', 'output_dir', 'input', 'rf_model', 'variant_caller']
]

pipeline_param_exclusion.each { pipeline, to_exclude ->
Expand Down Expand Up @@ -305,7 +309,21 @@ methods {
generate_pipeline_arg_strings = {
def params_exclude_from_all = ['output_dir', 'input_csv', 'work_dir', 'sample_id', 'patient_id', 'dataset_id', 'input']
def specific_params_to_exclude = [
'call_sSNV': ['algorithm']
'call_sSNV': ['algorithm'],
'StableLift': [
'src_fasta_id',
'src_fasta_ref',
'src_fasta_fai',
'src_fasta_dict',
'dest_fasta_id',
'dest_fasta_ref',
'dest_fasta_fai',
'dest_fasta_dict',
'chain_file',
'repeat_bed',
'header_contigs',
'gnomad_rds'
]
]
def specific_params_allowed_empty = [
'calculate_targeted_coverage': ['bait_bed', 'target_interval_list', 'bait_interval_list']
Expand Down Expand Up @@ -344,6 +362,67 @@ methods {
schema.validate_specific(pipeline_params_schema, params, []);
}

expand_stablelift_params = {
// Make sure the user didn't set any of the advanced parameters
def advanced_parameters = [
'src_fasta_id',
'src_fasta_ref',
'src_fasta_fai',
'src_fasta_dict',
'dest_fasta_id',
'dest_fasta_ref',
'dest_fasta_fai',
'dest_fasta_dict',
'chain_file',
'repeat_bed',
'header_contigs',
'gnomad_rds'
]

for (key in advanced_parameters) {
if (params.pipeline_params["StableLift"].containsKey(key)) {
throw new Exception("Do not directly set params.${key} - the value will be inferred from params.liftover_direction")
}
}

def liftover_direction = params.pipeline_params["StableLift"].getOrDefault('liftover_direction', null)

def forward = "GRCh37ToGRCh38"
def backward = "GRCh38ToGRCh37"

if (liftover_direction in [forward, backward]) {
if (liftover_direction == forward) {
params.pipeline_params["StableLift"].src_fasta_id = 'GRCh37'
params.pipeline_params["StableLift"].src_fasta_ref = params.pipeline_params["StableLift"].fasta_ref_37

params.pipeline_params["StableLift"].dest_fasta_id = 'GRCh38'
params.pipeline_params["StableLift"].dest_fasta_ref = params.pipeline_params["StableLift"].fasta_ref_38

params.pipeline_params["StableLift"].chain_file = params.pipeline_params["StableLift"].resource_bundle_path + "/hg19ToHg38.over.chain"
params.pipeline_params["StableLift"].repeat_bed = params.pipeline_params["StableLift"].resource_bundle_path + "/GRCh38_RepeatMasker-intervals.bed"
params.pipeline_params["StableLift"].header_contigs = params.pipeline_params["StableLift"].resource_bundle_path + "/GRCh38_VCF-header-contigs.txt"
} else {
params.pipeline_params["StableLift"].src_fasta_id = 'GRCh38'
params.pipeline_params["StableLift"].src_fasta_ref = params.pipeline_params["StableLift"].fasta_ref_38

params.pipeline_params["StableLift"].dest_fasta_id = 'GRCh37'
params.pipeline_params["StableLift"].dest_fasta_ref = params.pipeline_params["StableLift"].fasta_ref_37

params.pipeline_params["StableLift"].chain_file = params.pipeline_params["StableLift"].resource_bundle_path + "/hg38ToHg19.over.chain"
params.pipeline_params["StableLift"].repeat_bed = params.pipeline_params["StableLift"].resource_bundle_path + "/GRCh37_RepeatMasker-intervals.bed"
params.pipeline_params["StableLift"].header_contigs = params.pipeline_params["StableLift"].resource_bundle_path + "/GRCh37_VCF-header-contigs.txt"
}

params.pipeline_params["StableLift"].src_fasta_fai = params.pipeline_params["StableLift"].src_fasta_ref + ".fai"
params.pipeline_params["StableLift"].dest_fasta_fai = params.pipeline_params["StableLift"].dest_fasta_ref + ".fai"

params.pipeline_params["StableLift"].src_fasta_dict = Nextflow.file(params.pipeline_params["StableLift"].src_fasta_ref).resolveSibling(Nextflow.file(params.pipeline_params["StableLift"].src_fasta_ref).getBaseName() + '.dict').toString()
params.pipeline_params["StableLift"].dest_fasta_dict = Nextflow.file(params.pipeline_params["StableLift"].dest_fasta_ref).resolveSibling(Nextflow.file(params.pipeline_params["StableLift"].dest_fasta_ref).getBaseName() + '.dict').toString()

params.pipeline_params["StableLift"].gnomad_rds = params.pipeline_params["StableLift"].resource_bundle_path + "/gnomad.v4.0.sv.Rds"
}
}

set_up = {
input_handler.convert_csv_inputs()
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
Expand All @@ -355,6 +434,9 @@ methods {
methods.set_submodule_versions()
methods.set_env()
pipeline_selector.handle_pipeline_selection()
if (params.pipeline_params["StableLift"].is_pipeline_enabled) {
methods.expand_stablelift_params()
}
methods.generate_pipeline_interval_params()
methods.set_pipeline_cpus()
methods.generate_pipeline_arg_strings()
Expand Down
14 changes: 12 additions & 2 deletions config/pipeline_selector.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ pipeline_selector {
'call-gSV': ['recalibrate-BAM'],
'call-sSV': ['recalibrate-BAM'],
'call-sCNA': ['recalibrate-BAM'],
'call-SRC': ['call-sSNV', 'call-sCNA']
'call-SRC': ['call-sSNV', 'call-sCNA'],
'StableLiftsSNV': ['call-sSNV']
]

if (input_type == 'BAM') {
Expand Down Expand Up @@ -135,7 +136,8 @@ pipeline_selector {
'call_gSV': 'call-gSV',
'call_sSV': 'call-sSV',
'call_sCNA': 'call-sCNA',
'call_SRC': 'call-SRC'
'call_SRC': 'call-SRC',
'StableLift': 'StableLiftsSNV'
];

pipeline_name_map.each { pipeline_key, pipeline ->
Expand All @@ -144,6 +146,14 @@ pipeline_selector {
}

params.pipeline_params[pipeline_key].is_pipeline_enabled = pipelines.contains(pipeline);

if (pipeline_key == 'StableLift' && pipelines.contains(pipeline)) {
if (!params.pipeline_params[pipeline_key].containsKey('lift_modes')) {
params.pipeline_params[pipeline_key]['lift_modes'] = []
}

params.pipeline_params[pipeline_key]['lift_modes'] << pipeline
}
}
}

Expand Down
9 changes: 9 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ requested_pipelines:
- call-sSV
- call-sCNA
- call-SRC
- StableLiftsSNV
help: 'List of pipelines to run'
override_realignment:
type: 'Bool'
Expand Down Expand Up @@ -171,6 +172,10 @@ global_job_submission_sbatch:
required: true
allow_empty: true
help: 'Sbatch command for submitting metapipeline job'
stablelift_models:
type: 'StableLiftModels'
required: true
help: 'Map of paths to StableLift models'
pipeline_params:
type: 'Namespace'
required: true
Expand Down Expand Up @@ -220,6 +225,10 @@ pipeline_params:
type: 'Namespace'
required: true
help: 'Parameters for generate-SQC-BAM'
StableLift:
type: 'Namespace'
required: true
help: 'Parameters for StableLift'
input:
type: 'InputNamespace'
required: true
Expand Down
13 changes: 12 additions & 1 deletion config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ params {

sample_mode = 'paired' // Choose from: 'single', 'paired', 'multi'

// Select pipeline(s) to run. Choices: 'align-DNA', 'recalibrate-BAM', 'generate-SQC-BAM', 'calculate-targeted-coverage', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV', 'call-sCNA'
// Select pipeline(s) to run. Choices: 'align-DNA', 'recalibrate-BAM', 'generate-SQC-BAM', 'calculate-targeted-coverage', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV', 'call-sCNA', 'StableLiftsSNV'
requested_pipelines = ['align-DNA', 'recalibrate-BAM', 'generate-SQC-BAM', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV', 'call-sCNA']

// Override conversion to FASTQ and re-alignment with BAM input
Expand Down Expand Up @@ -143,6 +143,17 @@ params {
]
]
}

StableLift {
liftover_direction = "GRCh37ToGRCh38"

stablelift_models = params.stablelift_models

fasta_ref_37 = "/hot/resource/reference-genome/GRCh37-EBI-hs37d5/hs37d5.fa"
fasta_ref_38 = "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"
resource_bundle_path = "/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/publish/resource"
funcotator_data_source = "/hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/publish/resource/funcotator_dataSources.v1.7.20200521s_StableLift"
}
}
}

Expand Down
1 change: 1 addition & 0 deletions external/pipeline-StableLift
Submodule pipeline-StableLift added at 407aaa
1 change: 1 addition & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ log.info """\
uclahs-cds/pipeline-call-sCNA: ${params.version_call_sCNA}
uclahs-cds/pipeline-calculate-targeted-coverage: ${params.version_calculate_targeted_coverage}
uclahs-cds/pipeline-generate-SQC-BAM: ${params.version_generate_SQC_BAM}
uclahs-cds/pipeline-StableLift: ${params.version_StableLift}
------------------------------------
Starting workflow...
Expand Down
40 changes: 40 additions & 0 deletions module/StableLift/create_YAML_StableLift.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.Yaml
/*
* Create input YAML file for the StableLift pipeline.
*
* Input:
* sample_info: A Map object containing sample information
*
* Output:
* @return A tuple of 4 items, inlcuding the sample_id and input_yaml and rf model and tool
*/
process create_YAML_StableLift {
publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${params.patient}/${sample_id}",
pattern: 'stablelift_input.yaml',
mode: 'copy'

input:
val(sample_info)

output:
tuple val(sample_id), path(input_yaml), val(run_model), val(sample_info.tool), emit: stablelift_input

exec:
input_yaml = 'stablelift_input.yaml'

sample_id = "${sample_info.sample}-${sample_info.tool}" as String

input_map = [
'sample_id': sample_id,
'input': [
'vcf': sample_info.path
]
]

Map all_models = params["StableLift"].stablelift_models
run_model = all_models[params["StableLift"].liftover_direction][sample_info.tool]

Yaml yaml = new Yaml()
yaml.dump(input_map, new FileWriter("${task.workDir}/${input_yaml}"))
}
14 changes: 14 additions & 0 deletions module/StableLift/default.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// EXECUTION SETTINGS AND GLOBAL DEFAULTS

// External config files import. DO NOT MODIFY THESE LINES!
includeConfig "${projectDir}/config/default.config"
includeConfig "${projectDir}/config/methods.config"
includeConfig "${projectDir}/nextflow.config"

// Inputs/parameters of the pipeline
params {

}

// Setup the pipeline config. DO NOT REMOVE THIS LINE!
methods.setup()
Loading

0 comments on commit 668815f

Please sign in to comment.