minor fixes

OHDSI · Jul 19, 2024 · 0b61a14 · 0b61a14
1 parent 692b287
commit 0b61a14
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 39 deletions.
diff --git a/R/AggregateCovariates.R b/R/AggregateCovariates.R
@@ -651,7 +651,7 @@ exportAndromedaToCsv <- function(
         if (sum(removeInd) > 0) {
           ParallelLogger::logInfo(paste0("Removing sum_value counts less than ", minCellCount))
           if (sum(removeInd) > 0) {
-            data$sum_value[removeInd] <- -1
+            data$sum_value[removeInd] <- -1*minCellCount
           }
         }
 
@@ -683,7 +683,7 @@ exportAndromedaToCsv <- function(
         if (sum(removeInd) > 0) {
           ParallelLogger::logInfo(paste0("Removing count_value counts less than ", minCellCount))
           if (sum(removeInd) > 0) {
-            data$count_value[removeInd] <- -1
+            data$count_value[removeInd] <- -1*minCellCount
           }
         }
 
@@ -725,6 +725,20 @@ exportAndromedaToCsv <- function(
     colnames(cohortCounts) <- SqlRender::camelCaseToSnakeCase(colnames(cohortCounts))
 
     # TODO apply minCellCount to columns row_count, person_count
+    removeInd <- cohortCounts$row_count < minCellCount
+    if (sum(removeInd) > 0) {
+      ParallelLogger::logInfo(paste0("Removing row_count counts less than ", minCellCount))
+      if (sum(removeInd) > 0) {
+        cohortCounts$row_count[removeInd] <- -1*minCellCount
+      }
+    }
+    removeInd <- cohortCounts$person_count < minCellCount
+    if (sum(removeInd) > 0) {
+      ParallelLogger::logInfo(paste0("Removing person_count counts less than ", minCellCount))
+      if (sum(removeInd) > 0) {
+        cohortCounts$person_count[removeInd] <- -1*minCellCount
+      }
+    }
 
     if(file.exists(file.path(saveLocation, 'cohort_counts.csv'))){
       append <- T

diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R
@@ -439,17 +439,27 @@ aggregateCsvs <- function(
 
         if(csvType == 'analysis_ref.csv'){
           data <- data %>%
-            dplyr::filter( # need to filter analysis_id and covariate_setting_id
-              !.data$setting_id %in% analysisRefTracker
-            )
-          analysisRefTracker <- c(analysisRefTracker, unique(data$setting_id))
+            dplyr::mutate(
+              unique_id = paste0(.data$setting_id, '-', .data$analysis_id)
+            ) %>%
+            dplyr::filter( # need to filter analysis_id and setting_id
+              !.data$unique_id %in% analysisRefTracker
+            ) %>%
+            dplyr::select(-"unique_id")
+
+          analysisRefTracker <- unique(c(analysisRefTracker, paste0(data$setting_id,'-',data$analysis_id)))
         }
-        if(csvType == 'covariate_ref.csv'){
+        if(csvType == 'covariate_ref.csv'){ # this could be problematic as may have differnet covariate_ids
           data <- data %>%
-            dplyr::filter(
-              !.data$setting_id %in% covariateRefTracker
-            )
-          covariateRefTracker <- c(covariateRefTracker, unique(data$setting_id))
+            dplyr::mutate(
+              unique_id = paste0(.data$setting_id, '-', .data$covariate_id)
+            ) %>%
+            dplyr::filter( # need to filter covariate_id and setting_id
+              !.data$unique_id %in% covariateRefTracker
+            )%>%
+            dplyr::select(-"unique_id")
+
+          covariateRefTracker <- unique(c(covariateRefTracker, paste0(data$setting_id,'-',data$covariate_id)))
         }
         if(csvType == 'settings.csv'){
           data <- data %>%

diff --git a/inst/sql/sql_server/DechallengeRechallenge.sql b/inst/sql/sql_server/DechallengeRechallenge.sql
@@ -13,25 +13,26 @@ select
 @dechallenge_stop_interval as dechallenge_stop_interval,
 @dechallenge_evaluation_window as dechallenge_evaluation_window,
 target_cohort_definition_id,
-	outcome_cohort_definition_id,
-	num_exposure_eras,
-	num_persons_exposed,
-	num_cases,
-	dechallenge_attempt,
-	dechallenge_fail,
-	dechallenge_success,
-	rechallenge_attempt,
-	rechallenge_fail,
-	rechallenge_success,
-	case when num_cases > 0 then 1.0*dechallenge_attempt / num_cases else null end as pct_dechallenge_attempt,
-	case when dechallenge_attempt > 0 then 1.0*dechallenge_success / dechallenge_attempt else null end as pct_dechallenge_success,
-	case when dechallenge_attempt > 0 then 1.0*dechallenge_fail / dechallenge_attempt else null end as pct_dechallenge_fail,
-	case when dechallenge_attempt > 0 then 1.0*rechallenge_attempt / dechallenge_attempt else null end as pct_rechallenge_attempt,
-	case when rechallenge_attempt > 0 then 1.0*rechallenge_success / rechallenge_attempt else null end as pct_rechallenge_success,
-	case when rechallenge_attempt > 0 then 1.0*rechallenge_fail / rechallenge_attempt else null end as pct_rechallenge_fail
+outcome_cohort_definition_id,
+num_exposure_eras,
+num_persons_exposed,
+num_cases,
+dechallenge_attempt,
+dechallenge_fail,
+dechallenge_success,
+rechallenge_attempt,
+rechallenge_fail,
+rechallenge_success,
+case when num_cases > 0 then 1.0*dechallenge_attempt / num_cases else null end as pct_dechallenge_attempt,
+case when dechallenge_attempt > 0 then 1.0*dechallenge_success / dechallenge_attempt else null end as pct_dechallenge_success,
+case when dechallenge_attempt > 0 then 1.0*dechallenge_fail / dechallenge_attempt else null end as pct_dechallenge_fail,
+case when dechallenge_attempt > 0 then 1.0*rechallenge_attempt / dechallenge_attempt else null end as pct_rechallenge_attempt,
+case when rechallenge_attempt > 0 then 1.0*rechallenge_success / rechallenge_attempt else null end as pct_rechallenge_success,
+case when rechallenge_attempt > 0 then 1.0*rechallenge_fail / rechallenge_attempt else null end as pct_rechallenge_fail
 
-	into #challenge
-from
+INTO #challenge
+
+FROM
 (
 	select cases.target_cohort_definition_id, cases.outcome_cohort_definition_id,
 		exposures.num_exposure_eras,
@@ -69,15 +70,15 @@ from
 	and io1.cohort_start_date > dc1.cohort_start_date  and io1.cohort_start_date <= dc1.cohort_end_date
 	group by dc1.cohort_definition_id, io1.cohort_definition_id
 
-	-- added this code to return 0s when there are no outcomes
-	-- so we can tell whether the dechal has been run or not
 	union
+
 	select distinct
 	dc1_temp.cohort_definition_id as target_cohort_definition_id,
 	io1_temp.cohort_definition_id as outcome_cohort_definition_id,
 	0 as num_cases
 	from #target_cohort dc1_temp
-	join #outcome_cohort io1_temp
+	cross join #outcome_cohort io1_temp
+
 	) temp_cases
 	group by
 	target_cohort_definition_id,
@@ -109,7 +110,7 @@ from
 	on dc1.subject_id = ro1.subject_id
 	and io1.cohort_definition_id = ro1.cohort_definition_id
 	and ro1.cohort_start_date > dc1.cohort_end_date
-	and ro1.cohort_start_date <= dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)   --this should be parameterized to be the dechallenge window required for success/failure
+	and ro1.cohort_start_date <= dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)
 	group by dc1.cohort_definition_id, io1.cohort_definition_id
 	) dechallenge_fail
 	on cases.target_cohort_definition_id = dechallenge_fail.target_cohort_definition_id
@@ -126,12 +127,12 @@ from
 	on dc1.subject_id = ro0.subject_id
 	and io1.cohort_definition_id = ro0.cohort_definition_id
 	and ro0.cohort_start_date > dc1.cohort_end_date
-	and ro0.cohort_start_date <= dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)   --this should be parameterized to be the dechallenge window required for success/failure
+	and ro0.cohort_start_date <= dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)
 	inner join #target_cohort de1
 	on dc1.subject_id = de1.subject_id
 	and dc1.cohort_definition_id = de1.cohort_definition_id
-	and de1.cohort_start_date > dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)   --using same dechallenge window to detrmine when rechallenge attempt can start
-	where ro0.subject_id is null --not a dechallenge fail
+	and de1.cohort_start_date > dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)
+	where ro0.subject_id is null
 	group by dc1.cohort_definition_id, io1.cohort_definition_id
 	) rechallenge_attempt
 	on cases.target_cohort_definition_id = rechallenge_attempt.target_cohort_definition_id
@@ -148,17 +149,17 @@ from
 	on dc1.subject_id = ro0.subject_id
 	and io1.cohort_definition_id = ro0.cohort_definition_id
 	and ro0.cohort_start_date > dc1.cohort_end_date
-	and ro0.cohort_start_date <= dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)   --this should be parameterized to be the dechallenge window required for success/failure
+	and ro0.cohort_start_date <= dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)
 	inner join #target_cohort de1
 	on dc1.subject_id = de1.subject_id
 	and dc1.cohort_definition_id = de1.cohort_definition_id
-	and de1.cohort_start_date > dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)   --using same dechallenge window to detrmine when rechallenge attempt can start
+	and de1.cohort_start_date > dateadd(day, @dechallenge_evaluation_window, dc1.cohort_end_date)
 	inner join #outcome_cohort ro1
 	on de1.subject_id = ro1.subject_id
 	and io1.cohort_definition_id = ro1.cohort_definition_id
 	and ro1.cohort_start_date > de1.cohort_start_date
 	and ro1.cohort_start_date <= de1.cohort_end_date
-	where ro0.subject_id is null --not a dechallenge fail
+	where ro0.subject_id is null
 	group by dc1.cohort_definition_id, io1.cohort_definition_id
 	) rechallenge_fail
 	on cases.target_cohort_definition_id = rechallenge_fail.target_cohort_definition_id