Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use external concept counts #1117

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(createConceptCountsTable)
export(createDiagnosticsExplorerZip)
export(createMergedResultsFile)
export(createResultsDataModel)
export(deployPositConnectApp)
export(executeDiagnostics)
export(getCdmDataSourceInformation)
export(getCohortCounts)
export(getConceptCountsTableName)
export(getDataMigrator)
export(getDefaultCovariateSettings)
export(getDefaultVocabularyTableNames)
Expand Down
81 changes: 81 additions & 0 deletions R/ConceptCountsTable.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright 2022 Observational Health Data Sciences and Informatics
#
# This file is part of CohortDiagnostics
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' createConceptCountsTable
#'
#' @description Create a table containing concept counts.
#' CohortDiagnostics performs this task in every run and takes a significant amount of time.
#' However, with this function, the user can create this table beforehand and
#' save it in the writing schema for further use.
#'
#' @inheritParams executeDiagnostics
#' @param conceptCountsDatabaseSchema schema name for the concept counts table
#' @param conceptCountsTableIsTemp boolean to indicate if it should be a temporary table
#' @param removeCurrentTable if the current table should be removed
#'
#' @export
createConceptCountsTable <- function(connectionDetails = NULL,
azimov marked this conversation as resolved.
Show resolved Hide resolved
connection = NULL,
cdmDatabaseSchema,
tempEmulationSchema = NULL,
conceptCountsTable = "concept_counts",
conceptCountsDatabaseSchema = cdmDatabaseSchema,
conceptCountsTableIsTemp = FALSE,
removeCurrentTable = TRUE) {
ParallelLogger::logInfo("Creating concept counts table")
if (is.null(connection)) {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
}
sql <-
SqlRender::loadRenderTranslateSql(
"CreateConceptCountTable.sql",
packageName = "CohortDiagnostics",
dbms = connection@dbms,
tempEmulationSchema = tempEmulationSchema,
cdm_database_schema = cdmDatabaseSchema,
work_database_schema = conceptCountsDatabaseSchema,
concept_counts_table = conceptCountsTable,
table_is_temp = conceptCountsTableIsTemp,
remove_current_table = removeCurrentTable
)
executeSql(connection, sql)
}

#' getConceptCountsTableName
#'
#' @description Get a concept counts table name that is unique for the current database version.
#' We need to make sure the table is only used if the counts are for the current database.
#'
#' @param connection database connection
#' @param cdmDatabaseSchema CDM schema
#'
#' @return the concepts count table name
#' @export
getConceptCountsTableName <- function(connection, cdmDatabaseSchema) {
result <- "concept_counts"
sql <- paste("SELECT vocabulary_version as version",
"FROM @cdmDatabaseSchema.VOCABULARY",
"WHERE vocabulary_id = 'None'")
dbVersion <- DatabaseConnector::renderTranslateQuerySql(connection = connection,
sql = sql,
cdmDatabaseSchema = cdmDatabaseSchema) |>
dplyr::pull(1)
if (!identical(dbVersion, character(0))) {
result <- paste(gsub(" |\\.|-", "_", dbVersion), result, sep = "_")
}
return(result)
}
26 changes: 0 additions & 26 deletions R/ConceptSetUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,3 @@
)
return(orphanConcepts)
}

createConceptCountsTable <- function(connectionDetails = NULL,
connection = NULL,
cdmDatabaseSchema,
tempEmulationSchema = NULL,
conceptCountsDatabaseSchema = cdmDatabaseSchema,
conceptCountsTable = "concept_counts",
conceptCountsTableIsTemp = FALSE) {
ParallelLogger::logInfo("Creating internal concept counts table")
if (is.null(connection)) {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
}
sql <-
SqlRender::loadRenderTranslateSql(
"CreateConceptCountTable.sql",
packageName = utils::packageName(),
dbms = connection@dbms,
tempEmulationSchema = tempEmulationSchema,
cdm_database_schema = cdmDatabaseSchema,
work_database_schema = conceptCountsDatabaseSchema,
concept_counts_table = conceptCountsTable,
table_is_temp = conceptCountsTableIsTemp
)
DatabaseConnector::executeSql(connection, sql)
}
72 changes: 36 additions & 36 deletions R/ConceptSets.R
Original file line number Diff line number Diff line change
Expand Up @@ -464,22 +464,24 @@ runConceptSetDiagnostics <- function(connection,

if ((runIncludedSourceConcepts && nrow(subsetIncluded) > 0) ||
(runOrphanConcepts && nrow(subsetOrphans) > 0)) {
timeExecution(
exportFolder,
taskName = "createConceptCountsTable",
cohortIds = NULL,
parent = "runConceptSetDiagnostics",
expr = {
createConceptCountsTable(
connection = connection,
cdmDatabaseSchema = cdmDatabaseSchema,
tempEmulationSchema = tempEmulationSchema,
conceptCountsDatabaseSchema = conceptCountsDatabaseSchema,
conceptCountsTable = conceptCountsTable,
conceptCountsTableIsTemp = conceptCountsTableIsTemp
if (!useExternalConceptCountsTable) {
timeExecution(
exportFolder,
taskName = "createConceptCountsTable",
cohortIds = NULL,
parent = "runConceptSetDiagnostics",
expr = {
createConceptCountsTable(
connection = connection,
cdmDatabaseSchema = cdmDatabaseSchema,
tempEmulationSchema = tempEmulationSchema,
conceptCountsDatabaseSchema = conceptCountsDatabaseSchema,
conceptCountsTable = conceptCountsTable,
conceptCountsTableIsTemp = conceptCountsTableIsTemp
)
}
)
}
)
}
if (runIncludedSourceConcepts) {
timeExecution(
Expand All @@ -499,9 +501,6 @@ runConceptSetDiagnostics <- function(connection,
}
if (nrow(subsetIncluded) > 0) {
start <- Sys.time()
if (useExternalConceptCountsTable) {
stop("Use of external concept count table is not supported")
} else {
sql <- SqlRender::loadRenderTranslateSql(
"CohortSourceCodes.sql",
packageName = utils::packageName(),
Expand Down Expand Up @@ -616,7 +615,6 @@ runConceptSetDiagnostics <- function(connection,
signif(delta, 3),
attr(delta, "units")
))
}
}
}
)
Expand Down Expand Up @@ -889,7 +887,7 @@ runConceptSetDiagnostics <- function(connection,
if (!useExternalConceptCountsTable) {
ParallelLogger::logTrace("Using internal concept count table.")
} else {
stop("Use of external concept count table is not supported")
ParallelLogger::logTrace("Using external concept count table.")
}

# [OPTIMIZATION idea] can we modify the sql to do this for all uniqueConceptSetId in one query using group by?
Expand Down Expand Up @@ -1081,23 +1079,25 @@ runConceptSetDiagnostics <- function(connection,

if ((runIncludedSourceConcepts && nrow(subsetIncluded) > 0) ||
(runOrphanConcepts && nrow(subsetOrphans) > 0)) {
ParallelLogger::logTrace("Dropping temp concept count table")
if (conceptCountsTableIsTemp) {
countTable <- conceptCountsTable
} else {
countTable <-
paste(conceptCountsDatabaseSchema, conceptCountsTable, sep = ".")
}

sql <- "TRUNCATE TABLE @count_table; DROP TABLE @count_table;"
DatabaseConnector::renderTranslateExecuteSql(
connection,
sql,
tempEmulationSchema = tempEmulationSchema,
count_table = countTable,
progressBar = FALSE,
reportOverallTime = FALSE
)
if (!useExternalConceptCountsTable) {
ParallelLogger::logTrace("Dropping temp concept count table")
if (conceptCountsTableIsTemp) {
countTable <- conceptCountsTable
} else {
countTable <-
paste(conceptCountsDatabaseSchema, conceptCountsTable, sep = ".")
}

sql <- "TRUNCATE TABLE @count_table; DROP TABLE @count_table;"
DatabaseConnector::renderTranslateExecuteSql(
connection,
sql,
tempEmulationSchema = tempEmulationSchema,
count_table = countTable,
progressBar = FALSE,
reportOverallTime = FALSE
)
}
}

delta <- Sys.time() - startConceptSetDiagnostics
Expand Down
40 changes: 38 additions & 2 deletions R/RunDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ getDefaultCovariateSettings <- function() {
#' diagnostics to.
#' @param cohortDefinitionSet Data.frame of cohorts must include columns cohortId, cohortName, json, sql
#' @param cohortTableNames Cohort Table names used by CohortGenerator package
#' @param conceptCountsTable Concepts count table name. The default is "#concept_counts" to create a temporal concept counts table.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

THese changes look good but you need to run devtools::document() (or the build in rstudio) to add the changes to include these parameters. Doing this should get the build to finish and I can then merge in your changes.

#' If an external concept counts table is used, provide the name in character, e.g. "concept_counts" without a hash
#' @param databaseId A short string for identifying the database (e.g. 'Synpuf').
#' @param databaseName The full name of the database. If NULL, defaults to value in cdm_source table
#' @param databaseDescription A short description (several sentences) of the database. If NULL, defaults to value in cdm_source table
Expand All @@ -136,6 +138,7 @@ getDefaultCovariateSettings <- function() {
#' @param incremental Create only cohort diagnostics that haven't been created before?
#' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where records are kept
#' of which cohort diagnostics has been executed.
#' @param useExternalConceptCountsTable If TRUE an external table for the cohort concept counts will be used.
#' @param runFeatureExtractionOnSample Logical. If TRUE, the function will operate on a sample of the data.
#' Default is FALSE, meaning the function will operate on the full data set.
#'
Expand Down Expand Up @@ -205,6 +208,7 @@ executeDiagnostics <- function(cohortDefinitionSet,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
cohortTable = "cohort",
cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTable),
conceptCountsTable = "#concept_counts",
vocabularyDatabaseSchema = cdmDatabaseSchema,
cohortIds = NULL,
cdmVersion = 5,
Expand All @@ -223,6 +227,7 @@ executeDiagnostics <- function(cohortDefinitionSet,
irWashoutPeriod = 0,
incremental = FALSE,
incrementalFolder = file.path(exportFolder, "incremental"),
useExternalConceptCountsTable = FALSE,
runFeatureExtractionOnSample = FALSE,
sampleN = 1000,
seed = 64374,
Expand Down Expand Up @@ -687,6 +692,37 @@ executeDiagnostics <- function(cohortDefinitionSet,
}
)
}

# Defines variables and checks version of external concept counts table -----
if (!useExternalConceptCountsTable) {
conceptCountsTableIsTemp <- TRUE
if (conceptCountsTable != "#concept_counts") {
conceptCountsTable <- "#concept_counts"
}
} else {
if (conceptCountsTable == "#concept_counts") {
stop("Temporary conceptCountsTable name. Please provide a valid external ConceptCountsTable name")
}
conceptCountsTableIsTemp <- FALSE
azimov marked this conversation as resolved.
Show resolved Hide resolved
conceptCountsTable <- conceptCountsTable
dataSourceInfo <- getCdmDataSourceInformation(connection = connection, cdmDatabaseSchema = cdmDatabaseSchema)
vocabVersion <- dataSourceInfo$vocabularyVersion
vocabVersionExternalConceptCountsTable <- renderTranslateQuerySql(
connection = connection,
sql = "SELECT DISTINCT vocabulary_version FROM @work_database_schema.@concept_counts_table;",
work_database_schema = cohortDatabaseSchema,
concept_counts_table = conceptCountsTable,
snakeCaseToCamelCase = TRUE,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchena")
)
if (!identical(vocabVersion, vocabVersionExternalConceptCountsTable[1,1])) {
stop(paste0("External concept counts table (",
vocabVersionExternalConceptCountsTable,
") does not match database (",
vocabVersion,
"). Update concept_counts with createConceptCountsTable()"))
}
}

# Always export concept sets to csv
exportConceptSets(
Expand Down Expand Up @@ -719,11 +755,11 @@ executeDiagnostics <- function(cohortDefinitionSet,
exportFolder = exportFolder,
minCellCount = minCellCount,
conceptCountsDatabaseSchema = NULL,
conceptCountsTable = "#concept_counts",
conceptCountsTable = conceptCountsTable,
conceptCountsTableIsTemp = TRUE,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
useExternalConceptCountsTable = FALSE,
useExternalConceptCountsTable = useExternalConceptCountsTable,
incremental = incremental,
conceptIdTable = "#concept_ids",
recordKeepingFile = recordKeepingFile
Expand Down
21 changes: 15 additions & 6 deletions inst/sql/sql_server/CreateConceptCountTable.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
{DEFAULT @table_is_temp = FALSE}
{DEFAULT @remove_current_table = TRUE}

{@table_is_temp} ? {
IF OBJECT_ID('tempdb..@concept_counts_table', 'U') IS NOT NULL
DROP TABLE @concept_counts_table;
} : {
IF OBJECT_ID('@work_database_schema.@concept_counts_table', 'U') IS NOT NULL
DROP TABLE @work_database_schema.@concept_counts_table;
{@remove_current_table} ? {
{@table_is_temp} ? {
IF OBJECT_ID('tempdb..@concept_counts_table', 'U') IS NOT NULL
DROP TABLE @concept_counts_table;
} : {
IF OBJECT_ID('@work_database_schema.@concept_counts_table', 'U') IS NOT NULL
DROP TABLE @work_database_schema.@concept_counts_table;
}
}

SELECT concept_id,
Expand Down Expand Up @@ -95,3 +98,9 @@ FROM (
FROM @cdm_database_schema.observation
GROUP BY observation_source_concept_id
) tmp;

{@table_is_temp} ? {} : {
ALTER TABLE @work_database_schema.@concept_counts_table
ADD vocabulary_version VARCHAR(20) NULL;
UPDATE @work_database_schema.@concept_counts_table SET vocabulary_version = (SELECT vocabulary_version FROM @cdm_database_schema.vocabulary WHERE vocabulary_id = 'None');
}
Loading
Loading