Skip to content

Commit

Permalink
Update test-manualData.R
Browse files Browse the repository at this point in the history
- added test to manual data to ensure cohort type Exclude works
  • Loading branch information
jreps committed Jul 24, 2024
1 parent b39d6a0 commit 66efbde
Showing 1 changed file with 199 additions and 0 deletions.
199 changes: 199 additions & 0 deletions tests/testthat/test-manualData.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ context("manual data")
manualData <- file.path(tempdir(), 'manual.sqlite')
on.exit(file.remove(manualData), add = TRUE)

manualData2 <- file.path(tempdir(), 'manual2.sqlite')
on.exit(file.remove(manualData2), add = TRUE)

test_that("manual data runCharacterizationAnalyses", {

# this test creates made-up OMOP CDM data
Expand Down Expand Up @@ -296,3 +299,199 @@ testthat::expect_true(covs_cont$count_value[covs_cont$covariate_id == 1002 & cov


})



test_that("manual data checking exclude count works", {

# this test creates made-up OMOP CDM data
# and runs runCharacterizationAnalyses on the data
# to check whether the results are as expected
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = 'sqlite',
server = manualData2
)
con <- DatabaseConnector::connect(connectionDetails = connectionDetails)
schema <- 'main'

# add persons - aggregate covs (age)
persons <- data.frame(
person_id = 1:10,
gender_concept_id = rep(8532, 10),
year_of_birth = rep(2000, 10),
race_concept_id = rep(1, 10),
ethnicity_concept_id = rep(1, 10),
location_id = rep(1,10),
provider_id = rep(1,10),
care_site_id = rep(1,10),
person_source_value = 1:10,
gender_source_value = rep('female', 10),
race_source_value = rep('na', 10),
ethnicity_source_value = rep('na', 10)
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = 'person',
data = persons
)

# observation period
obs_period <- data.frame(
observation_period_id = 1:10,
person_id = 1:10,
observation_period_start_date = rep('2000-12-31', 10),
observation_period_end_date = c('2000-12-31', rep('2020-12-31', 9)),
period_type_concept_id = rep(1,10)
)
obs_period$observation_period_start_date <- as.Date(obs_period$observation_period_start_date)
obs_period$observation_period_end_date <- as.Date(obs_period$observation_period_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = 'observation_period',
data = obs_period
)
# person 1 has 1 day obs
# person 2-6 has no events
# person 7 has diabetes at 10, headache at 12
# person 8 has diabetes at 13
# person 9 has headache multiple times
# person 10 has diabetes at 14
# add conditions - aggregate covs (conditions)

condition_era <- data.frame(
condition_era_id = 1:7,
person_id = c(7,7,8, 9,9,9,10),
condition_concept_id = c(201820, 378253,201820,378253,378253,378253, 201820),
condition_era_start_date = c('2011-01-01', '2013-04-03', '2016-01-01',
'2006-01-04', '2014-08-02', '2014-08-04',
'2013-01-04'),
condition_era_end_date = c('2011-01-01', '2013-04-03', '2016-01-01',
'2006-01-04', '2014-08-02', '2014-08-04',
'2013-01-04'),
condition_occurrence_count = rep(1, 7)
)
condition_era$condition_era_start_date <- as.Date(condition_era$condition_era_start_date)
condition_era$condition_era_end_date <- as.Date(condition_era$condition_era_end_date)

DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = 'condition_era',
data = condition_era
)

# add concept
concept <- data.frame(
concept_id = c(201820,378253),
concept_name = c('diabetes', 'hypertension'),
domain_id = rep(1,2),
vocabulary_id = rep(1,2),
concept_class_id = c('Condition', 'Condition'),
standard_concept = rep('S',2),
concept_code = rep('Snowmed',2)
#,valid_start_date = NULL,
#valid_end_date = NULL,
#invalid_reason = NULL
)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = 'concept',
data = concept
)

# add cohort - tte/dechal/rechal
# person 6 has the outcome just before the exposure
cohort <- data.frame(
subject_id = c(
1:10,
7,8,10,
c(3,6,7,8,10),
c(7),
6
),
cohort_definition_id = c(
rep(1,10),
rep(1,3),
rep(2, 5),
2,
2
),
cohort_start_date = c(
rep('2018-01-01', 10),
rep('2018-05-01',3),
'2018-01-13','2018-01-03',rep('2018-01-06',3),
'2018-05-24',
'2017-12-29'
),
cohort_end_date = c(
rep('2018-02-01', 10),
rep('2018-06-01',3),
'2018-02-02','2018-02-04',rep('2018-02-08',3),
'2018-06-05',
'2017-12-29'
)
)
cohort$cohort_start_date <- as.Date(cohort$cohort_start_date)
cohort$cohort_end_date <- as.Date(cohort$cohort_end_date)
DatabaseConnector::insertTable(
connection = con,
databaseSchema = schema,
tableName = 'cohort',
data = cohort
)

# create settings and run
characterizationSettings <- Characterization::createCharacterizationSettings(
timeToEventSettings = Characterization::createTimeToEventSettings(
targetIds = 1,
outcomeIds = 2
),
dechallengeRechallengeSettings = Characterization::createDechallengeRechallengeSettings(
targetIds = 1,
outcomeIds = 2
),
aggregateCovariateSettings = Characterization::createAggregateCovariateSettings(
targetIds = 1,
outcomeIds = 2,
minPriorObservation = 365,
outcomeWashoutDays = 30,
riskWindowStart = 1,
riskWindowEnd = 90,
covariateSettings = FeatureExtraction::createCovariateSettings(
useDemographicsAge = T,
useDemographicsGender = T,
useConditionEraAnyTimePrior = T
),
caseCovariateSettings = Characterization::createDuringCovariateSettings(useConditionEraDuring = T),
casePreTargetDuration = 365*5
)
)
Characterization::runCharacterizationAnalyses(
connectionDetails = connectionDetails,
targetDatabaseSchema = schema,
targetTable = 'cohort',
outcomeDatabaseSchema = schema,
outcomeTable = 'cohort',
cdmDatabaseSchema = schema,
characterizationSettings = characterizationSettings,
outputDirectory = file.path(tempdir(), 'result2'),
executionPath = file.path(tempdir(), 'execution2'),
csvFilePrefix = 'c_',
databaseId = '1',
incremental = T,
threads = 1,
minCharacterizationMean = 0.0001,
minCellCount = NULL,
showSubjectId = T
)

# load the cohort counts to make sure the exclude is there
counts <- read.csv(file.path(tempdir(), 'result2','c_cohort_counts.csv'))
# when restricted to first exposure 5 people have outcome
testthat::expect_true(counts$row_count[counts$cohort_type == 'Cases'] == 4)
testthat::expect_true(counts$row_count[counts$cohort_type == 'Exclude'] == 1)

})

0 comments on commit 66efbde

Please sign in to comment.