Skip to content

Commit

Permalink
- Rename highimpact and highlight to stemcell_hotspot and cancer_gene
Browse files Browse the repository at this point in the history
- add dosage_sensitive_gene annotation
- change scoring to score each category separately (not score once per gene)
  • Loading branch information
Nicolai-vKuegelgen committed Oct 22, 2024
1 parent 06b49a2 commit 705a0fb
Show file tree
Hide file tree
Showing 17 changed files with 405 additions and 222 deletions.
7 changes: 5 additions & 2 deletions stemcnv_check/control_files/allowedvalues_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,13 @@ settings:
gene_overlap:
exclude_gene_type_regex: list__str
include_only_these_gene_types: list__str
high_impact_list: str
highlight_list: str
stemcell_hotspot_list: str
cancer_gene_list: str

Check_score_values:
pHaplo_threshold: float_le1_ge0
pTriplo_threshold: float_le1_ge0
dosage_sensitive_gene: float_ge0
any_roi_hit: float_ge0
any_other_gene: float_ge0
large_CN_size_modifier: float_ge1
Expand Down
18 changes: 13 additions & 5 deletions stemcnv_check/control_files/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -226,23 +226,31 @@ settings:
# description & description_doi will be used to display extra info in the report
# mapping can be 'gene_name', 'gband', and 'position' and should describe the hotspot
# call_type can be 'any', 'gain', 'loss' or 'LOH'
high_impact_list: '__inbuilt__/supplemental-files/HighImpact-stemcell-hotspots.tsv'
highlight_list: '__inbuilt__/supplemental-files/genelist-cancer-drivers.tsv'
stemcell_hotspot_list: '__inbuilt__/supplemental-files/genelist-stemcell-hotspots.tsv'
cancer_gene_list: '__inbuilt__/supplemental-files/genelist-cancer-drivers.tsv'
# also available: '__inbuilt__/supplemental-files/genelist-cancer-hotspots.tsv'

# Scoring for CNV and LOH calls
# scoring combines a Size based contribution with scores for overlapping annotated regions
Check_score_values:
# HighImpact & Highlight scores need to be defined in the respective tables
# stemcell_hotspot & cancer_gene scores need to be defined in the respective tables
# CNVs/LOHs get the summed scored of each overlapping annotated gene or region (gband/position)
# genes are only scored _once_ per call, i.e. a gene with both highimpact and highlight match will only
# genes are only scored _once_ per call, i.e. a gene with both stemcell_hotspot and cancer_gene match will only
# contribute the higher of the two annotated scores.
# Genes without a score in the genelists are scored as 'any_other_gene'

# Dosage sensivity predicition is a based on Collins et. al. 2022 (doi:10.1016/j.cell.2022.06.036)
# CNV loss alls overlapping a gene with pHaplos score >= threshold are scored with the 'dosage_sensitive_gene' score
# CNV gain calls are respectively scored for the pTriplo score
pHaplo_threshold: 0.86
pTriplo_threshold: 0.94
dosage_sensitive_gene: 5
# Genes without any score from the hotspot lists of dosage sensivity are scored as 'any_other_gene'
any_other_gene: 0.2
# Calls overlapping any sample defined ROI get this one-time score
any_roi_hit: 50
# CNVs with a large CN (<1 or >3) have their size contribution multiplied by this factor
large_CN_size_modifier: 1.5


##!advanced
Precision:
Expand Down
31 changes: 21 additions & 10 deletions stemcnv_check/scripts/R/R_plotting_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ make_LRR_BAF_plots <- function(
filter_by_overlaps(GRanges(seqnames = chr, strand = '*', ranges = IRanges(start = call.row$start, end = call.row$end))) %>%
as_tibble()

high_impact_list <- call.row$HighImpact %>% str_split('\\|') %>% unlist()
highlight_list <- call.row$Highlight %>% str_split('\\|') %>% unlist()
stemcell_hotspot_list <- call.row$stemcell_hotspot %>% str_split('\\|') %>% unlist()
dosage_sensitive_gene_list <- call.row$dosage_sensitive_gene %>% str_split('\\|') %>% unlist()
cancer_gene_list <- call.row$cancer_gene %>% str_split('\\|') %>% unlist()
gene.data <- gr_genes %>%
filter_by_overlaps(GRanges(seqnames = chr, strand = '*', ranges = IRanges(start = win_start, end = win_end))) %>%
as_tibble() %>%
Expand All @@ -83,8 +84,9 @@ make_LRR_BAF_plots <- function(
y_pos = ifelse(strand == '+', 1, 0),
Sample_Name = paste(sample_headers, collapse = '---'),
direct_hit = gene_id %in% direct_genes$gene_id,
high_impact = gene_name %in% high_impact_list,
highlight = gene_name %in% highlight_list,
stemcell_hotspot = gene_name %in% stemcell_hotspot_list,
dosage_sensitive_gene = gene_name %in% dosage_sensitive_gene_list,
cancer_gene = gene_name %in% cancer_gene_list,
) %>%
separate_rows(Sample_Name, sep = '---') %>%
# Need to ensure table contains reference so everything is properly facet_wrapped
Expand All @@ -103,8 +105,9 @@ make_LRR_BAF_plots <- function(
y_pos = 0,
Sample_Name = paste(sample_headers, collapse = '---'),
color = case_when(
str_detect(section_name, paste(high_impact_list, collapse = '|')) ~ 'red',
str_detect(section_name, paste(highlight_list, collapse = '|')) ~ 'orange',
str_detect(section_name, paste(stemcell_hotspot_list, collapse = '|')) ~ 'red',
# Onlt the stemcell list contains gbands
# str_detect(section_name, paste(cancer_gene_list, collapse = '|')) ~ 'orange',
band_staining == 'gpos100' ~ 'black',
band_staining == 'gpos50' ~ 'grey30',
band_staining == 'gpos25' ~ 'grey70',
Expand Down Expand Up @@ -160,8 +163,9 @@ make_LRR_BAF_plots <- function(
aes(
x = x_pos, y = y_pos, width = width, height = .9,
fill = case_when(
high_impact ~ 'red',
highlight ~ 'orange',
stemcell_hotspot ~ 'red',
dosage_sensitive_gene ~ 'orange',
cancer_gene ~ 'orange',
direct_hit ~ 'black',
TRUE ~ 'grey50'
)
Expand Down Expand Up @@ -275,10 +279,17 @@ make_LRR_BAF_plots <- function(

gene.data <- gene.data %>%
filter(!is.na(x_pos)) %>%
dplyr::select(seqnames, start, end, width, strand, high_impact, highlight, direct_hit, gene_name, gene_type, gene_id) %>%
dplyr::select(
seqnames, start, end, width, strand, stemcell_hotspot, dosage_sensitive_gene, cancer_gene,
direct_hit, gene_name, gene_type, gene_id
) %>%
mutate(CNVtype = as.character(call.row$CNV_type)) %>%
unique()

list('gg' = gg, 'genes' = gene.data, 'hotspots' = c(high_impact_list, highlight_list) %>% na.omit())
list(
'gg' = gg,
'genes' = gene.data,
'hotspots' = c(stemcell_hotspot_list, dosage_sensitive_gene_list, cancer_gene_list) %>% na.omit()
)

}
84 changes: 48 additions & 36 deletions stemcnv_check/scripts/R/R_table_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,14 @@ summary_table <- function(summary_stat_table, sample_headers, config) {
}

format_hotspots_to_badge <- function(
hotspot_vec, CNVtype_vec, hotspot_table, listname = 'high_impact', include_hover = TRUE
hotspot_vec, CNVtype_vec, hotspot_table, listname = 'stemcell_hotspot', include_hover = TRUE
) {
if (listname == "high_impact") {
if (listname == "stemcell_hotspot") {
shorthand <- 'HI'
} else if (listname == "highlight") {
} else if (listname %in% c("cancer_gene", "dosage_sensitive_gene")) {
shorthand <- 'HL'
} else {
stop(str_glue("Unsupported list type '{listname}', only 'high_impact' and 'highlight' are defined"))
stop(str_glue("Unsupported list type '{listname}', only 'stemcell_hotspot', 'dosage_sensitive_gene' and 'cancer_gene' are defined"))
}

hotspot_table.any <- hotspot_table %>%
Expand Down Expand Up @@ -206,7 +206,9 @@ format_hotspots_to_badge <- function(
}
}

CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_info, report_config, caption = NULL) {
CNV_table_output <- function(
tb, plotsection, stemcell_hotspot_tb, dosage_sensitive_gene_tb, cancer_gene_tb, gr_info, report_config, caption = NULL
) {
always_include <- report_config$call.data.and.plots[[plotsection]]$always_include
# Reorder & subset columns
tb <- tb %>%
Expand All @@ -226,8 +228,9 @@ CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_i
)
),
Precision_Estimate = ifelse(is.na(Precision_Estimate), '-', as.character(Precision_Estimate)),
HighImpact = map2_chr(HighImpact, CNV_type, \(hi,c) format_hotspots_to_badge(hi, c, high_impact_tb, 'high_impact')),
Highlight = map2_chr(Highlight, CNV_type, \(hi,c) format_hotspots_to_badge(hi, c, highlight_tb, 'highlight')),
stemcell_hotspot = map2_chr(stemcell_hotspot, CNV_type, \(hi,c) format_hotspots_to_badge(hi, c, stemcell_hotspot_tb, 'stemcell_hotspot')),
dosage_sensitive_gene = map2_chr(dosage_sensitive_gene, CNV_type, \(hi,c) format_hotspots_to_badge(hi, c, dosage_sensitive_gene_tb, 'dosage_sensitive_gene')),
cancer_gene = map2_chr(cancer_gene, CNV_type, \(hi,c) format_hotspots_to_badge(hi, c, cancer_gene_tb, 'cancer_gene')),
genome_bands = pmap_chr(., \(chrom, start, end, ...) {
gr_info %>%
filter_by_overlaps(GRanges(seqnames = chrom, strand = '*', ranges = IRanges(start, end))) %>%
Expand All @@ -248,9 +251,9 @@ CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_i
Plot, Call_label, Check_Score,
CNV_type, chrom, Size, genome_bands,
start, end, #invis 10-11
CNV_caller, HighImpact, Highlight, ROI_hits,
CNV_caller, stemcell_hotspot, dosage_sensitive_gene, cancer_gene, ROI_hits,
Precision_Estimate, probe_coverage_gap, high_probe_density,
# invis: 19++
# invis: 20++
copynumber, LRR, n_probes, n_uniq_probes, #n_premerged_calls, caller_confidence,
caller_merging_coverage, Gap_percent
)
Expand All @@ -264,16 +267,17 @@ CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_i
'Number of the CNV call, sorted by descending Check_Score',
'Link to the plot of the CNV call\\nNote: For the Top20/critical CNVs clicking on the link will switch the active plot below. For other CNVs it will open the plot in a new browser tab.',
'Designation label for the CNV call, (Critical, Reportable, Reference genotype, ROI)',
'Check_Score of the CNV call, calculated based on size, overlap high impact or highlight list, or other genes',
'Check_Score of the CNV call, calculated based on size, overlap with stemcell_hotspot, dosage_sensivity or cancer_gene list, or other genes',
'Type of CNV call (gain, loss, LOH)',
'Chromosome of the CNV call',
'Size of the CNV call (in base pairs)',
'Genome bands overlapping the CNV call',
'Start position of the CNV call',
'End position of the CNV call',
'Caller tools detecting this CNV call',
'High impact hotspots overlapping with this CNV call',
'Highlight hotspots overlapping with this CNV call',
'Stemcell hotspots overlapping with this CNV call',
'Dosage sensitive genes overlapping with this CNV call',
'Cancer genes overlapping with this CNV call',
'Regions of interest overlapping with this CNV call',
#FIXME (future): add a doi for precision benchmark once available
'Precision estimate of the CNV call, based on internal benchmarking',
Expand Down Expand Up @@ -302,7 +306,7 @@ CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_i
buttons = c('colvis', 'copy', 'csv', 'excel', 'print'),
columnDefs = list(
#This uses 0-indexing vs the usual R 1-indexing
list(targets = c(0:2,10:11,19:(ncol(tb)-1)), visible = FALSE)
list(targets = c(0:2,10:11,20:(ncol(tb)-1)), visible = FALSE)
)
),
callback = JS(
Expand All @@ -321,7 +325,7 @@ CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_i
select(
CNV_type, Check_Score,
chrom, start, end, Size, genome_bands, CNV_caller,
HighImpact, Highlight,
stemcell_hotspot, dosage_sensitive_gene, cancer_gene,
Precision_Estimate, probe_coverage_gap, high_probe_density
) %>%
rename_with(format_column_names)
Expand All @@ -330,7 +334,7 @@ CNV_table_output <- function(tb, plotsection, high_impact_tb, highlight_tb, gr_i
}

gene_table_output <- function(
tb, plotsection, high_impact_tb, highlight_tb, report_config, caption = NULL, extra_cols = c()
tb, plotsection, stemcell_hotspot_tb, dosage_sensitive_gene_tb, cancer_gene_tb, report_config, caption = NULL, extra_cols = c()
) {

if (report_config$call.data.and.plots[[plotsection]]$include.gene.table.details == 'Call') {
Expand All @@ -346,53 +350,61 @@ gene_table_output <- function(
tb <- tb %>%
mutate(
across(any_of(c('direct_hit', 'gene_type', 'strand')), ~ factor(.)),
high_impact = factor(ifelse(high_impact, 'hit', '-'), levels = c('hit', '-')),
highlight = factor(ifelse(highlight, 'hit', '-'), levels = c('hit', '-')),
across(
any_of(c('stemcell_hotspot', 'dosage_sensitive_gene', 'cancer_gene')),
~ factor(ifelse(., 'yes', '-'), levels = c('yes', '-'))
),
# stemcell_hotspot = factor(ifelse(stemcell_hotspot, 'hit', '-'), levels = c('hit', '-')),
# cancer_gene = factor(ifelse(cancer_gene, 'hit', '-'), levels = c('hit', '-')),
name_is_geneid = str_detect(gene_name, 'ENSG[0-9]{11}'),
# REEV: gene_id *should* work, but won't if they are deprectated/not in Annonars
REEV = str_glue("<a href='https://reev.bihealth.org/gene/{gene_name}' target='_blank' rel='noopener noreferrer'>{gene_name}</a>"),
GTex = str_glue("<a href='https://gtexportal.org/home/gene/{gene_name}' target='_blank' rel='noopener noreferrer'>{gene_name}</a>"),
NCBI = ifelse(name_is_geneid, '-', str_glue("<a href='https://pubmed.ncbi.nlm.nih.gov/?term={gene_name}' target='_blank' rel='noopener noreferrer'>{gene_name}</a>")),
Ensembl = str_glue("<a href=' https://www.ensembl.org/Homo_sapiens/Gene/Summary?g={gene_id}' target='_blank' rel='noopener noreferrer'>{gene_id}</a>"),
#Reformat gene name
gene_name = ifelse(
high_impact == 'hit',
map2_chr(gene_name, CNVtype, \(g, c) format_hotspots_to_badge(g,c, high_impact_tb,'high_impact', FALSE)),
map2_chr(gene_name, CNVtype, \(g, c) format_hotspots_to_badge(g,c, highlight_tb, 'highlight', FALSE))
gene_name = case_when(
stemcell_hotspot == 'yes' ~ map2_chr(gene_name, CNVtype, \(g, c) format_hotspots_to_badge(g,c, stemcell_hotspot_tb,'stemcell_hotspot', FALSE)),
dosage_sensitive_gene == 'yes' ~ map2_chr(gene_name, CNVtype, \(g, c) format_hotspots_to_badge(g,c, dosage_sensitive_gene_tb, 'dosage_sensitive_gene', FALSE)),
cancer_gene == 'yes' ~ map2_chr(gene_name, CNVtype, \(g, c) format_hotspots_to_badge(g,c, cancer_gene_tb, 'cancer_gene', FALSE)),
TRUE ~ gene_name
),
) %>%
arrange(high_impact, highlight, desc(direct_hit), start) %>%
arrange(stemcell_hotspot, cancer_gene, desc(direct_hit), start) %>%
select(
gene_name, gene_id, seqnames, start, end, strand, high_impact, highlight,
gene_name, gene_id, seqnames, start, end, strand, stemcell_hotspot, dosage_sensitive_gene, cancer_gene,
any_of(extra_cols), REEV, GTex, NCBI, Ensembl
)
if (params$out_format == 'html') {
colors1 <- ifelse(tb$high_impact == 'hit', 'red' , 'white')
colors2 <- ifelse(tb$highlight == 'hit', 'orange', 'white')
colors1 <- ifelse(tb$stemcell_hotspot == 'yes', 'red' , 'white')
colors2 <- ifelse(tb$dosage_sensitive_gene == 'yes', 'orange', 'white')
colors3 <- ifelse(tb$cancer_gene == 'yes', 'orange', 'white')
dt <- datatable(
tb, rownames = FALSE, escape = FALSE,
options = list(
dom = 'Bftilp', pageLength = 10,
extensions = c('Buttons'),
buttons = c('colvis', 'copy', 'csv', 'excel', 'print'),
columnDefs = list(list(targets = 1:7, visible = FALSE))
columnDefs = list(list(targets = 1:8, visible = FALSE))
)
) %>%
formatStyle('high_impact', backgroundColor = styleRow(1:nrow(tb), colors1)) %>% # textAlign = 'center'
formatStyle('highlight', backgroundColor = styleRow(1:nrow(tb), colors2))
formatStyle('stemcell_hotspot', backgroundColor = styleRow(1:nrow(tb), colors1)) %>% # textAlign = 'center'
formatStyle('dosage_sensitive_gene', backgroundColor = styleRow(1:nrow(tb), colors2)) %>%
formatStyle('cancer_gene', backgroundColor = styleRow(1:nrow(tb), colors3))
return(dt)
} else {
tb <- tb %>% select(gene_name, gene_id, high_impact, highlight, any_of(extra_cols))
tb <- tb %>% select(gene_name, gene_id, stemcell_hotspot, dosage_sensitive_gene, cancer_gene, any_of(extra_cols))
return(kable(tb, caption = caption))
}
}

hotspot_table_output <- function(
hotspots, cnv_type, plotsection, high_impact_tb, highlight_tb, report_config, out_format, caption = NULL
hotspots, cnv_type, plotsection, stemcell_hotspot_tb, dosage_sensitive_gene_tb, cancer_gene_tb, report_config, out_format, caption = NULL
){
tb <- bind_rows(
high_impact_tb,
highlight_tb
stemcell_hotspot_tb,
dosage_sensitive_gene_tb,
cancer_gene_tb
) %>%
filter(hotspot %in% hotspots & call_type %in% c('any', cnv_type))

Expand All @@ -403,10 +415,10 @@ hotspot_table_output <- function(
dplyr::rename(description = description_htmllinks) %>%
select(hotspot, call_type, list_name, description, check_score, any_of(colnames(tb))) %>%
mutate(
hotspot = ifelse(
list_name %in% unique(high_impact_tb$list_name),
map2_chr(hotspot, call_type, \(g, c) format_hotspots_to_badge(g,c, high_impact_tb,'high_impact', FALSE)),
map2_chr(hotspot, call_type, \(g, c) format_hotspots_to_badge(g,c, highlight_tb, 'highlight', FALSE))
hotspot = case_when(
list_name == unique(stemcell_hotspot_tb$list_name) ~ map2_chr(hotspot, call_type, \(g, c) format_hotspots_to_badge(g,c, stemcell_hotspot_tb,'stemcell_hotspot', FALSE)),
list_name == unique(dosage_sensitive_gene_tb$list_name) ~ map2_chr(hotspot, call_type, \(g, c) format_hotspots_to_badge(g,c, dosage_sensitive_gene_tb, 'dosage_sensitive_gene', FALSE)),
list_name == unique(cancer_gene_tb$list_name) ~ map2_chr(hotspot, call_type, \(g, c) format_hotspots_to_badge(g,c, cancer_gene_tb, 'cancer_gene', FALSE))
),
description = str_replace_all(description, '&#013;', '<br/>')
) %>%
Expand Down
10 changes: 5 additions & 5 deletions stemcnv_check/scripts/R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ load_genomeInfo <- function(ginfo_file, config, target_style='UCSC') {
gr_info
}

load_hotspot_table <- function(config, table = 'HighImpact') {
load_hotspot_table <- function(config, table = 'stemcell_hotspot') {

if (table == 'HighImpact') {
filename <- config$settings$CNV_processing$gene_overlap$high_impact_list
} else if (table == 'Highlight') {
filename <- config$settings$CNV_processing$gene_overlap$highlight_list
if (table == 'stemcell_hotspot') {
filename <- config$settings$CNV_processing$gene_overlap$stemcell_hotspot_list
} else if (table == 'cancer_gene') {
filename <- config$settings$CNV_processing$gene_overlap$cancer_gene_list
} else {
stop('Invalid table name')
}
Expand Down
Loading

0 comments on commit 705a0fb

Please sign in to comment.