From 68bf19b89645b099024eeac5b9c8e3a3389d8d2d Mon Sep 17 00:00:00 2001 From: Kyrylo Simonov Date: Wed, 26 Apr 2023 11:23:55 -0500 Subject: [PATCH] Workaround for changed column names in DQD 2.1 See issue OHDSI/AresIndexer#30. The fix is borrowed from https://github.com/OHDSI/AresIndexer/pull/35. --- R/AugmentConceptFiles.R | 2 +- R/BuildDataQualityHistoryIndex.R | 12 ++++++------ R/BuildNetworkPerformanceIndex.R | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/AugmentConceptFiles.R b/R/AugmentConceptFiles.R index ccbd656..81357d5 100644 --- a/R/AugmentConceptFiles.R +++ b/R/AugmentConceptFiles.R @@ -39,7 +39,7 @@ augmentConceptFiles <- function(releaseFolder) { results <- dataQualityResults$CheckResults # augment achilles concept files with data quality failure count for relevant concept checks - conceptAggregates <- results %>% filter(!is.na(results$CONCEPT_ID) && results$FAILED==1) %>% count(CONCEPT_ID,tolower(CDM_TABLE_NAME)) + conceptAggregates <- results %>% filter(!is.na(results$conceptId) && results$failed==1) %>% count(conceptId,tolower(cdmTableName)) names(conceptAggregates) <- c("concept_id","cdm_table_name", "count_failed") writeLines(paste0(nrow(conceptAggregates), " concept level data quality issues found.")) if (nrow(conceptAggregates) > 0) { diff --git a/R/BuildDataQualityHistoryIndex.R b/R/BuildDataQualityHistoryIndex.R index 9a3642d..f17a15b 100644 --- a/R/BuildDataQualityHistoryIndex.R +++ b/R/BuildDataQualityHistoryIndex.R @@ -32,18 +32,18 @@ buildDataQualityHistoryIndex <- stratified_index <- data.table::data.table() addResultsToIndex <- function(json) { - cdm_source_name <- json$Metadata[1,"CDM_SOURCE_NAME"] - cdm_source_abbreviation <- json$Metadata[1,"CDM_SOURCE_ABBREVIATION"] - vocabulary_version <- json$Metadata[1,"VOCABULARY_VERSION"] - cdm_release_date <- format(lubridate::ymd(json$Metadata[1,"CDM_RELEASE_DATE"]),"%Y-%m-%d") + cdm_source_name <- json$Metadata[1,"cdmSourceName"] + cdm_source_abbreviation <- json$Metadata[1,"cdmSourceAbbreviation"] + vocabulary_version <- json$Metadata[1,"vocabularyVersion"] + cdm_release_date <- format(lubridate::ymd(json$Metadata[1,"cdmReleaseDate"]),"%Y-%m-%d") count_passed <- as.numeric(json$Overview$countPassed) count_failed <- as.numeric(json$Overview$countOverallFailed) count_total <- count_passed + count_failed dqd_execution_date <- format(lubridate::ymd_hms(json$endTimestamp),"%Y-%m-%d") stratifiedAggregates <- json$CheckResults %>% - filter(FAILED==1) %>% - group_by(CATEGORY, toupper(CDM_TABLE_NAME)) %>% + filter(failed==1) %>% + group_by(category, toupper(cdmTableName)) %>% summarise(count_value=n()) names(stratifiedAggregates) <- c("category", "cdm_table_name", "count_value") stratifiedAggregates$dqd_execution_date <- dqd_execution_date diff --git a/R/BuildNetworkPerformanceIndex.R b/R/BuildNetworkPerformanceIndex.R index aefa128..04d0efe 100644 --- a/R/BuildNetworkPerformanceIndex.R +++ b/R/BuildNetworkPerformanceIndex.R @@ -65,8 +65,8 @@ buildNetworkPerformanceIndex <- performanceTable <- merge(x=performanceTable,y=analysisDetails,by="TASK",all.x=TRUE) - dqdTable <- dplyr::select(dqdData, c("CheckResults.checkId", "CheckResults.EXECUTION_TIME", "CheckResults.CATEGORY")) %>% - rename(TASK = CheckResults.checkId, TIMING = CheckResults.EXECUTION_TIME, CATEGORY = CheckResults.CATEGORY) %>% mutate(PACKAGE = "DQD") %>% + dqdTable <- dplyr::select(dqdData, c("CheckResults.checkId", "CheckResults.executionTime", "CheckResults.category")) %>% + rename(TASK = CheckResults.checkId, TIMING = CheckResults.executionTime, CATEGORY = CheckResults.category) %>% mutate(PACKAGE = "DQD") %>% mutate_at("TIMING", str_replace, " secs", "") mergedTable <- rbind(performanceTable, dqdTable)