From f0833781d0205f4989005b5ef19ada3ac24caf8f Mon Sep 17 00:00:00 2001 From: Colin Davis Date: Tue, 10 Dec 2024 11:25:45 -0600 Subject: [PATCH] Tests pass --- .../link_step_train_test_models.py | 26 ++++++++++++++++--- hlink/tests/model_exploration_test.py | 12 ++++++--- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/hlink/linking/model_exploration/link_step_train_test_models.py b/hlink/linking/model_exploration/link_step_train_test_models.py index a2e65c5..070c1da 100644 --- a/hlink/linking/model_exploration/link_step_train_test_models.py +++ b/hlink/linking/model_exploration/link_step_train_test_models.py @@ -975,7 +975,7 @@ def _combine_by_threshold_matrix_entry( threshold_results: list[dict[int, ThresholdTestResult]], ) -> list[ThresholdTestResult]: # This list will have a size of the number of threshold matrix entries - results: list[ThresholdTestResult] = [] + results: list[list[ThresholdTestResult]] = [] # Check number of folds if len(threshold_results) < 2: @@ -1027,15 +1027,35 @@ def _aggregate_per_threshold_results( pr_auc_test_sd = statistics.stdev(pr_auc_test) if len(pr_auc_test) > 1 else np.nan mcc_test_sd = statistics.stdev(mcc_test) if len(mcc_test) > 1 else np.nan + # Deal with tiny test data. This should never arise in practice but if it did we ought + # to issue a warning. + if len(precision_test) < 1: + # raise RuntimeError("Not enough training data to get any valid precision values.") + precision_test_mean = np.nan + else: + precision_test_mean = ( + statistics.mean(precision_test) + if len(precision_test) > 1 + else precision_test[0] + ) + + if len(recall_test) < 1: + # raise RuntimeError("Not enough training data to get any valid recall values.") + recall_test_mean = np.nan + else: + recall_test_mean = ( + statistics.mean(recall_test) if len(recall_test) > 1 else recall_test[0] + ) + new_desc = pd.DataFrame( { "model": [best_models[0].model_type], "parameters": [best_models[0].hyperparams], "alpha_threshold": [alpha_threshold], "threshold_ratio": [threshold_ratio], - "precision_test_mean": [statistics.mean(precision_test)], + "precision_test_mean": [precision_test_mean], "precision_test_sd": [precision_test_sd], - "recall_test_mean": [statistics.mean(recall_test)], + "recall_test_mean": [recall_test_mean], "recall_test_sd": [recall_test_sd], "pr_auc_test_mean": [statistics.mean(pr_auc_test)], "pr_auc_test_sd": [pr_auc_test_sd], diff --git a/hlink/tests/model_exploration_test.py b/hlink/tests/model_exploration_test.py index 30bca92..46166c5 100644 --- a/hlink/tests/model_exploration_test.py +++ b/hlink/tests/model_exploration_test.py @@ -684,7 +684,6 @@ def test_step_2_train_random_forest_spark( "featureSubsetStrategy": "sqrt", } ] - feature_conf["training"]["output_suspicious_TD"] = True feature_conf["training"]["n_training_iterations"] = 3 model_exploration.run_step(0) @@ -694,9 +693,12 @@ def test_step_2_train_random_forest_spark( tr = spark.table("model_eval_training_results").toPandas() print(f"training results {tr}") # assert tr.shape == (1, 18) - assert tr.query("model == 'random_forest'")["pr_auc_mean"].iloc[0] > 2.0 / 3.0 + assert tr.query("model == 'random_forest'")["pr_auc_test_mean"].iloc[0] > 2.0 / 3.0 assert tr.query("model == 'random_forest'")["maxDepth"].iloc[0] == 3 + # TODO probably remove these since we're not planning to test suspicious data anymore. + # I disabled the saving of suspicious in this test config so these are invalid currently. + """ FNs = spark.table("model_eval_repeat_fns").toPandas() assert FNs.shape == (3, 4) assert FNs.query("id_a == 30")["count"].iloc[0] == 3 @@ -706,6 +708,7 @@ def test_step_2_train_random_forest_spark( TNs = spark.table("model_eval_repeat_tns").toPandas() assert TNs.shape == (6, 4) + """ main.do_drop_all("") @@ -717,18 +720,19 @@ def test_step_2_train_logistic_regression_spark( feature_conf["training"]["model_parameters"] = [ {"type": "logistic_regression", "threshold": 0.7} ] - feature_conf["training"]["n_training_iterations"] = 4 + feature_conf["training"]["n_training_iterations"] = 3 model_exploration.run_step(0) model_exploration.run_step(1) model_exploration.run_step(2) tr = spark.table("model_eval_training_results").toPandas() + # assert tr.count == 3 assert tr.shape == (1, 11) # This is now 0.83333333333.... I'm not sure it's worth testing against # assert tr.query("model == 'logistic_regression'")["pr_auc_mean"].iloc[0] == 0.75 - assert tr.query("model == 'logistic_regression'")["pr_auc_mean"].iloc[0] > 0.74 + assert tr.query("model == 'logistic_regression'")["pr_auc_test_mean"].iloc[0] > 0.74 assert ( round(tr.query("model == 'logistic_regression'")["alpha_threshold"].iloc[0], 1) == 0.7