diff --git a/pcgr/variant.py b/pcgr/variant.py index 1d4c5fe1..bbd9c64e 100644 --- a/pcgr/variant.py +++ b/pcgr/variant.py @@ -257,7 +257,7 @@ def clean_annotations(variant_set: pd.DataFrame, yaml_data: dict, germline: bool variant_set.loc[variant_set['CLINVAR_CONFLICTED'] != 1, "CLINVAR_CONFLICTED"] = False if not {'VCF_SAMPLE_ID'}.issubset(variant_set.columns): - variant_set['VCF_SAMPLE_ID'] = yaml_data['sample_id'].astype(str) + variant_set['VCF_SAMPLE_ID'] = str(yaml_data['sample_id']) variant_set['SAMPLE_ID'] = str(yaml_data['sample_id']) variant_set['GENOME_VERSION'] = yaml_data['genome_assembly'] if {'CHROM','POS','REF','ALT',}.issubset(variant_set.columns): diff --git a/pcgrr/R/germline.R b/pcgrr/R/germline.R index fc6d8e01..afec4089 100644 --- a/pcgrr/R/germline.R +++ b/pcgrr/R/germline.R @@ -12,15 +12,15 @@ max_af_gnomad <- function(sample_calls){ "type data.frame")) ) ## set maximum AF from gnomAD (all populations) - gnomad_cols <- c("gnomAD_AF", - "gnomAD_NFE_AF", - "gnomAD_AMR_AF", - "gnomAD_AFR_AF", - "gnomAD_SAS_AF", - "gnomAD_EAS_AF", - "gnomAD_ASJ_AF", - "gnomAD_FIN_AF", - "gnomAD_OTH_AF") + gnomad_cols <- c("gnomADe_AF", + "gnomADe_NFE_AF", + "gnomADe_AMR_AF", + "gnomADe_AFR_AF", + "gnomADe_SAS_AF", + "gnomADe_EAS_AF", + "gnomADe_ASJ_AF", + "gnomADe_FIN_AF", + "gnomADe_OTH_AF") sample_calls$MAX_AF_GNOMAD <- 0 for (c in gnomad_cols) { if(c %in% colnames(sample_calls)){ @@ -358,7 +358,7 @@ assign_somatic_germline_evidence <- function(sample_calls, config) { pcgrr::assign_germline_popfreq_status( sample_calls, pop = pop, - dbquery = "gnomAD", + dbquery = "gnomADe", max_tolerated_af = config[["tumor_only"]][[paste0("maf_gnomad_", tolower(pop))]]) } @@ -382,7 +382,7 @@ assign_somatic_germline_evidence <- function(sample_calls, config) { #' #' @param sample_calls data frame with variants #' @param pop population code (1000 Genomes/gnomAD) -#' @param dbquery 1KG or gnomAD +#' @param dbquery gnomADe #' @param max_tolerated_af max tolerated germline allele frequency #' #' @return sample_calls @@ -390,15 +390,15 @@ assign_somatic_germline_evidence <- function(sample_calls, config) { #' @export assign_germline_popfreq_status <- function(sample_calls, pop = "EUR", - dbquery = "1KG", + dbquery = "gnomADe", max_tolerated_af = 0.01) { - if (dbquery == "gnomAD") { + if (dbquery == "gnomADe") { if (!("STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED" %in% colnames(sample_calls))) { sample_calls$STATUS_POPFREQ_GNOMAD_ABOVE_TOLERATED <- FALSE } - col <- paste0(pop, "_AF_GNOMAD") + col <- paste0(dbquery,"_",pop, "_AF") if (any(grepl(paste0("^", col, "$"), names(sample_calls)))) { sample_calls$max_tolerated_af <- max_tolerated_af diff --git a/pcgrr/data-raw/data-raw.R b/pcgrr/data-raw/data-raw.R index 109b2e0b..04d53667 100755 --- a/pcgrr/data-raw/data-raw.R +++ b/pcgrr/data-raw/data-raw.R @@ -187,15 +187,15 @@ data_coltype_defs[['snv_indel_somatic_raw']] <- readr::cols_only( SIMPLEREPEATS_HIT = readr::col_logical(), WINMASKER_HIT = readr::col_logical(), VEP_ALL_CSQ = readr::col_character(), - gnomAD_AF = readr::col_number(), - gnomAD_AMR_AF = readr::col_number(), - gnomAD_AFR_AF = readr::col_number(), - gnomAD_EAS_AF = readr::col_number(), - gnomAD_FIN_AF = readr::col_number(), - gnomAD_ASJ_AF = readr::col_number(), - gnomAD_OTH_AF = readr::col_number(), - gnomAD_NFE_AF = readr::col_number(), - gnomAD_SAS_AF = readr::col_number(), + gnomADe_AF = readr::col_number(), + gnomADe_AMR_AF = readr::col_number(), + gnomADe_AFR_AF = readr::col_number(), + gnomADe_EAS_AF = readr::col_number(), + gnomADe_FIN_AF = readr::col_number(), + gnomADe_ASJ_AF = readr::col_number(), + gnomADe_OTH_AF = readr::col_number(), + gnomADe_NFE_AF = readr::col_number(), + gnomADe_SAS_AF = readr::col_number(), EFFECT_PREDICTIONS = readr::col_character(), SAMPLE_ID = readr::col_character(), VCF_SAMPLE_ID = readr::col_character(), @@ -299,15 +299,15 @@ data_coltype_defs[['snv_indel_germline_raw']] <- readr::cols_only( DBNSFP_BAYESDEL_ADDAF = readr::col_character(), DBNSFP_SPLICE_SITE_ADA = readr::col_character(), DBNSFP_SPLICE_SITE_RF = readr::col_character(), - gnomAD_AF = readr::col_number(), - gnomAD_AMR_AF = readr::col_number(), - gnomAD_AFR_AF = readr::col_number(), - gnomAD_EAS_AF = readr::col_number(), - gnomAD_FIN_AF = readr::col_number(), - gnomAD_ASJ_AF = readr::col_number(), - gnomAD_OTH_AF = readr::col_number(), - gnomAD_NFE_AF = readr::col_number(), - gnomAD_SAS_AF = readr::col_number(), + gnomADe_AF = readr::col_number(), + gnomADe_AMR_AF = readr::col_number(), + gnomADe_AFR_AF = readr::col_number(), + gnomADe_EAS_AF = readr::col_number(), + gnomADe_FIN_AF = readr::col_number(), + gnomADe_ASJ_AF = readr::col_number(), + gnomADe_OTH_AF = readr::col_number(), + gnomADe_NFE_AF = readr::col_number(), + gnomADe_SAS_AF = readr::col_number(), gnomADe_non_cancer_AC = readr::col_integer(), gnomADe_non_cancer_AN = readr::col_integer(), gnomADe_non_cancer_NHOMALT = readr::col_integer(),