Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert DQD camelCase to upper-snake to match AresIndexer #35

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion R/AugmentConceptFiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ augmentConceptFiles <- function(releaseFolder) {
if (file.exists(dataQualityResultsFile)) {
writeLines("updating concept files with data quality results")
dataQualityResults <- jsonlite::fromJSON(dataQualityResultsFile)
results <- dataQualityResults$CheckResults
results <- dataQualityResults$CheckResults %>%
dplyr::rename_with(SqlRender::camelCaseToSnakeCase) %>% dplyr::rename_with(toupper)

# augment achilles concept files with data quality failure count for relevant concept checks
conceptAggregates <- results %>% filter(!is.na(results$CONCEPT_ID) && results$FAILED==1) %>% count(CONCEPT_ID,tolower(CDM_TABLE_NAME))
Expand Down
11 changes: 7 additions & 4 deletions R/BuildDataQualityHistoryIndex.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,19 @@ buildDataQualityHistoryIndex <-
stratified_index <- data.table::data.table()

addResultsToIndex <- function(json) {
cdm_source_name <- json$Metadata[1,"CDM_SOURCE_NAME"]
cdm_source_abbreviation <- json$Metadata[1,"CDM_SOURCE_ABBREVIATION"]
vocabulary_version <- json$Metadata[1,"VOCABULARY_VERSION"]
cdm_release_date <- format(lubridate::ymd(json$Metadata[1,"CDM_RELEASE_DATE"]),"%Y-%m-%d")
thisMetadata <- json$Metadata %>%
dplyr::rename_with(SqlRender::camelCaseToSnakeCase) %>% dplyr::rename_with(toupper)
cdm_source_name <- thisMetadata[1,"CDM_SOURCE_NAME"]
cdm_source_abbreviation <- thisMetadata[1,"CDM_SOURCE_ABBREVIATION"]
vocabulary_version <- thisMetadata[1,"VOCABULARY_VERSION"]
cdm_release_date <- format(lubridate::ymd(thisMetadata[1,"CDM_RELEASE_DATE"]),"%Y-%m-%d")
count_passed <- as.numeric(json$Overview$countPassed)
count_failed <- as.numeric(json$Overview$countOverallFailed)
count_total <- count_passed + count_failed
dqd_execution_date <- format(lubridate::ymd_hms(json$endTimestamp),"%Y-%m-%d")

stratifiedAggregates <- json$CheckResults %>%
dplyr::rename_with(SqlRender::camelCaseToSnakeCase) %>% dplyr::rename_with(toupper) %>%
filter(FAILED==1) %>%
group_by(CATEGORY, toupper(CDM_TABLE_NAME)) %>%
summarise(count_value=n())
Expand Down
18 changes: 11 additions & 7 deletions R/BuildDataQualityIndex.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ buildDataQualityIndex <- function(sourceFolders, outputFolder) {

# iterate on sources
for (sourceFolder in sourceFolders) {
historicalIndex <- AresIndexer::buildDataQualityHistoryIndex(sourceFolder)
historicalIndex <- buildDataQualityHistoryIndex(sourceFolder)
historicalFile <- file.path(sourceFolder, "data-quality-index.json")
write(jsonlite::toJSON(historicalIndex),historicalFile)

Expand All @@ -51,7 +51,11 @@ buildDataQualityIndex <- function(sourceFolders, outputFolder) {
# process each data quality result file
if (file.exists(dataQualityResultsFile)) {
dataQualityResults <- jsonlite::fromJSON(dataQualityResultsFile)
results <- dataQualityResults$CheckResults
results <- dataQualityResults$CheckResults %>%
dplyr::rename_with(SqlRender::camelCaseToSnakeCase) %>% dplyr::rename_with(toupper)

metadata <- dataQualityResults$Metadata %>%
dplyr::rename_with(SqlRender::camelCaseToSnakeCase) %>% dplyr::rename_with(toupper)

# for each release, generate a summary of failures by cdm_table_name
domainAggregates <- results %>% filter(FAILED==1) %>% count(tolower(CDM_TABLE_NAME))
Expand All @@ -66,11 +70,11 @@ buildDataQualityIndex <- function(sourceFolders, outputFolder) {
results[,colName] <- NA
}
sourceFailures <- results[results[,"FAILED"]==1,outColNames]
sourceFailures$CDM_SOURCE_NAME <- dataQualityResults$Metadata$CDM_SOURCE_NAME
sourceFailures$CDM_SOURCE_ABBREVIATION <- dataQualityResults$Metadata$CDM_SOURCE_ABBREVIATION
sourceFailures$CDM_SOURCE_KEY <- gsub(" ","_",dataQualityResults$Metadata$CDM_SOURCE_ABBREVIATION)
sourceFailures$RELEASE_NAME <- format(lubridate::ymd(dataQualityResults$Metadata$CDM_RELEASE_DATE),"%Y-%m-%d")
sourceFailures$RELEASE_ID <- format(lubridate::ymd(dataQualityResults$Metadata$CDM_RELEASE_DATE),"%Y%m%d")
sourceFailures$CDM_SOURCE_NAME <- metadata$CDM_SOURCE_NAME
sourceFailures$CDM_SOURCE_ABBREVIATION <- metadata$CDM_SOURCE_ABBREVIATION
sourceFailures$CDM_SOURCE_KEY <- gsub(" ", "_", metadata$CDM_SOURCE_ABBREVIATION)
sourceFailures$RELEASE_NAME <- format(lubridate::ymd(metadata$CDM_RELEASE_DATE),"%Y-%m-%d")
sourceFailures$RELEASE_ID <- format(lubridate::ymd(metadata$CDM_RELEASE_DATE),"%Y%m%d")
networkIndex <- rbind(networkIndex, sourceFailures)
} else {
writeLines(paste("missing data quality result file ",dataQualityResultsFile))
Expand Down
21 changes: 12 additions & 9 deletions R/BuildNetworkIndex.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,23 @@ buildNetworkIndex <- function(sourceFolders, outputFolder) {
writeLines(paste("missing observation period results file ", observationPeriodResultsFile))
}

source$cdm_source_name <- dataQualityResults$Metadata$CDM_SOURCE_NAME
source$cdm_source_abbreviation <- dataQualityResults$Metadata$CDM_SOURCE_ABBREVIATION
thisMetadata <- dataQualityResults$Metadata %>%
dplyr::rename_with(SqlRender::camelCaseToSnakeCase) %>% dplyr::rename_with(toupper)

source$cdm_source_name <- thisMetadata$CDM_SOURCE_NAME
source$cdm_source_abbreviation <- thisMetadata$CDM_SOURCE_ABBREVIATION
source$cdm_source_key <- gsub(" ", "_", source$cdm_source_abbreviation)
source$cdm_holder <- dataQualityResults$Metadata$CDM_HOLDER
source$source_description <- dataQualityResults$Metadata$SOURCE_DESCRIPTION
source$cdm_holder <- thisMetadata$CDM_HOLDER
source$source_description <- thisMetadata$SOURCE_DESCRIPTION

source$releases <- rbind(
source$releases,
list(
release_name = format(lubridate::ymd(dataQualityResults$Metadata$CDM_RELEASE_DATE),"%Y-%m-%d"),
release_id = format(lubridate::ymd(dataQualityResults$Metadata$CDM_RELEASE_DATE),"%Y%m%d"),
cdm_version = dataQualityResults$Metadata$CDM_VERSION,
vocabulary_version = dataQualityResults$Metadata$VOCABULARY_VERSION,
dqd_version = dataQualityResults$Metadata$DQD_VERSION,
release_name = format(lubridate::ymd(thisMetadata$CDM_RELEASE_DATE),"%Y-%m-%d"),
release_id = format(lubridate::ymd(thisMetadata$CDM_RELEASE_DATE),"%Y%m%d"),
cdm_version = thisMetadata$CDM_VERSION,
vocabulary_version = thisMetadata$VOCABULARY_VERSION,
dqd_version = thisMetadata$DQD_VERSION,
count_data_quality_issues = dataQualityResults$Overview$countOverallFailed,
count_data_quality_checks = dataQualityResults$Overview$countTotal,
dqd_execution_date = format(lubridate::ymd_hms(dataQualityResults$endTimestamp),"%Y-%m-%d"),
Expand Down
9 changes: 6 additions & 3 deletions R/BuildNetworkPerformanceIndex.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@ buildNetworkPerformanceIndex <-

performanceTable <- merge(x=performanceTable,y=analysisDetails,by="TASK",all.x=TRUE)

dqdTable <- dplyr::select(dqdData, c("CheckResults.checkId", "CheckResults.EXECUTION_TIME", "CheckResults.CATEGORY")) %>%
rename(TASK = CheckResults.checkId, TIMING = CheckResults.EXECUTION_TIME, CATEGORY = CheckResults.CATEGORY) %>% mutate(PACKAGE = "DQD") %>%
mutate_at("TIMING", str_replace, " secs", "")
dqdTable <- dqdData %>%
dplyr::select(TASK = CheckResults.checkId,
TIMING = CheckResults.executionTime,
CATEGORY = CheckResults.category) %>%
dplyr::mutate(PACKAGE = "DQD") %>%
dplyr::mutate_at("TIMING", str_replace, " secs", "")

mergedTable <- rbind(performanceTable, dqdTable)

Expand Down