From ec9e04b8a0ee269dee665ea33beaaf8c478240e4 Mon Sep 17 00:00:00 2001 From: AnuarAssylkhanov Date: Thu, 22 Aug 2024 17:42:31 +0500 Subject: [PATCH] add dbSummary --- NAMESPACE | 1 + R/generateDbSummary.R | 124 +++++++++++++++++++++++++++++++ extras/GenerateDatabaseSummary.R | 23 +++++- man/generateDbSummary.Rd | 56 ++++++++++++++ 4 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 R/generateDbSummary.R create mode 100644 man/generateDbSummary.Rd diff --git a/NAMESPACE b/NAMESPACE index 821244e4..929c06a7 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(exportToAres) export(exportToJson) export(exportVisitDetailToJson) export(exportVisitToJson) +export(generateDbSummary) export(getAnalysisDetails) export(getSeasonalityScore) export(getTemporalData) diff --git a/R/generateDbSummary.R b/R/generateDbSummary.R new file mode 100644 index 00000000..859b4d35 --- /dev/null +++ b/R/generateDbSummary.R @@ -0,0 +1,124 @@ +# @file generateDbSummary +# +# Copyright 2021 Observational Health Data Sciences and Informatics +# +# This file is part of Achilles +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +#' @title +#' generateDbSummary +#' +#' @description +#' \code{generateDbSummary} can be run after the Achilles analyses are complete +#' to create a high-level database summary. +#' +#' @details +#' Used to generate a high-level database summary consisting of earliest date available, latest +#' date available, median age at first observation, total persons, etc. This function +#' creates a summary table meant for a manuscript detailing the network of databases +#' used in an analysis +#' +#' @param connectionDetails An R object of type \code{connectionDetails} created using the +#' function \code{createConnectionDetails} in the +#' \code{DatabaseConnector} package. +#' @param cdmDatabaseSchema Fully qualified name of database schema that contains OMOP CDM +#' schema. On SQL Server, this should specifiy both the database and the +#' schema, so for example, on SQL Server, 'cdm_instance.dbo'. +#' @param resultsDatabaseSchema Fully qualified name of database schema that we can write final +#' results to. Default is cdmDatabaseSchema. On SQL Server, this should +#' specifiy both the database and the schema, so for example, on SQL +#' Server, 'cdm_results.dbo'. +#' @param country The country of origin of the database +#' @param provenance The provenance of the data (EHR, claims, registry, etc) +#' +#' @return +#' none +#' +#' @examples +#' \dontrun{ +#' connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "sql server", +#' server = "yourserver") +#' dbSummary <- generateDbSummary(connectionDetails, +#' cdmDatabaseSchema = "cdm_schema", +#' resultsDatabaseSchema = "results_schema", +#' country = "Country of Origin", +#' provenance = "Provenance of data") +#' } +#' @export + +generateDbSummary <- function (connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + country, + provenance){ + + conn <- DatabaseConnector::connect(connectionDetails) + + sql <- + SqlRender::loadRenderTranslateSql( + sqlFilename = "summary/generateDbSummary.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + country = country, + provenance = provenance + ) + + dbSummary <- DatabaseConnector::querySql(conn, sql) + + sql <- + SqlRender::loadRenderTranslateSql( + sqlFilename = "summary/dbSourceVocabs.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + country = country, + provenance = provenance + ) + + dbSourceVocabs <- DatabaseConnector::querySql(conn, sql) + + sql <- + SqlRender::loadRenderTranslateSql( + sqlFilename = "summary/dbVisitDist.sql", + packageName = "Achilles", + dbms = connectionDetails$dbms, + warnOnMissingParameters = FALSE, + cdm_database_schema = cdmDatabaseSchema, + results_database_schema = resultsDatabaseSchema, + country = country, + provenance = provenance + ) + + dbVisitDist <- DatabaseConnector::querySql(conn, sql) + + DatabaseConnector::dbDisconnect(conn) + + # extract columns and pivot + dbInfo <- dbSummary[1,c(1,2,3,4)] + row.names(dbSummary) <- dbSummary$ATTRIBUTE_NAME + df <- dbSummary[,c('ATTRIBUTE_VALUE')] + df_t <- t(df) + colnames(df_t) <- rownames(dbSummary) + dbSummaryFinal <- cbind(dbInfo, df_t) + + colnames(dbSummaryFinal)[1:4] <- c("Data Source Name", "Data Source Abbreviation", "Source Country", "Data Provenance") + + return(list(summary=dbSummaryFinal, visitDist=dbVisitDist, sourceVocabs = dbSourceVocabs)) +} \ No newline at end of file diff --git a/extras/GenerateDatabaseSummary.R b/extras/GenerateDatabaseSummary.R index cae2a6aa..ffd4cc83 100644 --- a/extras/GenerateDatabaseSummary.R +++ b/extras/GenerateDatabaseSummary.R @@ -6,12 +6,31 @@ options(connectionObserver = NULL) demoCountry <- "United States" demoProvenance <- "Synthetic" +connectionDetails <- Eunomia::getEunomiaConnectionDetails() -dbSummary <- generateDbSummary(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema,demoCountry,demoProvenance) +cdmDatabaseSchema <- "main" +resultsDatabaseSchema <- "main" + +cdmVersion <- "5.3" + +Achilles::achilles( + cdmVersion = cdmVersion, + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = cdmDatabaseSchema, + smallCellCount = 0, + createTable = TRUE, + createIndices = FALSE, + sqlOnly = FALSE +) + +dbSummary <- Achilles::generateDbSummary(connectionDetails, cdmDatabaseSchema, resultsDatabaseSchema,demoCountry,demoProvenance) tableOutput <- dbSummary$summary tableOutput$"Source Vocabularies" <- paste(dbSummary$sourceVocabs$VOCABULARY_ID, collapse="
") tableOutput$"Visits" <- paste(dbSummary$visitDist$CONCEPT_NAME, collapse="
") + # this will open results in the RStudio Viewer which can then be exported to image or html. -kable(tableOutput,escape=F) %>% kableExtra::kable_styling() +kbl(tableOutput,escape=F) %>% kableExtra::kable_styling() + diff --git a/man/generateDbSummary.Rd b/man/generateDbSummary.Rd new file mode 100644 index 00000000..596eb597 --- /dev/null +++ b/man/generateDbSummary.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/generateDbSummary.R +\name{generateDbSummary} +\alias{generateDbSummary} +\title{generateDbSummary} +\usage{ +generateDbSummary( + connectionDetails, + cdmDatabaseSchema, + resultsDatabaseSchema, + country, + provenance +) +} +\arguments{ +\item{connectionDetails}{An R object of type \code{connectionDetails} created using the +function \code{createConnectionDetails} in the +\code{DatabaseConnector} package.} + +\item{cdmDatabaseSchema}{Fully qualified name of database schema that contains OMOP CDM +schema. On SQL Server, this should specifiy both the database and the +schema, so for example, on SQL Server, 'cdm_instance.dbo'.} + +\item{resultsDatabaseSchema}{Fully qualified name of database schema that we can write final +results to. Default is cdmDatabaseSchema. On SQL Server, this should +specifiy both the database and the schema, so for example, on SQL +Server, 'cdm_results.dbo'.} + +\item{country}{The country of origin of the database} + +\item{provenance}{The provenance of the data (EHR, claims, registry, etc)} +} +\value{ +none +} +\description{ +\code{generateDbSummary} can be run after the Achilles analyses are complete +to create a high-level database summary. +} +\details{ +Used to generate a high-level database summary consisting of earliest date available, latest +date available, median age at first observation, total persons, etc. This function +creates a summary table meant for a manuscript detailing the network of databases +used in an analysis +} +\examples{ +\dontrun{ +connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "sql server", + server = "yourserver") +dbSummary <- generateDbSummary(connectionDetails, + cdmDatabaseSchema = "cdm_schema", + resultsDatabaseSchema = "results_schema", + country = "Country of Origin", + provenance = "Provenance of data") +} +}