From 0eefd4b64f1a6a5439e792e775fc504606aa99a7 Mon Sep 17 00:00:00 2001
From: Michael Foster <m.foster@sheffield.ac.uk>
Date: Wed, 7 Feb 2024 15:41:46 +0000
Subject: [PATCH 01/36] Patsy interaction treatment terms can now be read

---
 causal_testing/testing/estimators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index e3996a0d..708fa48d 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -352,7 +352,7 @@ def estimate_coefficient(self) -> float:
         model = self._run_linear_regression()
         newline = "\n"
         treatment = [self.treatment]
-        if str(self.df.dtypes[self.treatment]) == "object":
+        if self.treatment in self.df.dtypes and str(self.df.dtypes[self.treatment]) == "object":
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
             treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
         assert set(treatment).issubset(
@@ -360,7 +360,7 @@ def estimate_coefficient(self) -> float:
         ), f"{treatment} not in\n{'  ' + str(model.params.index).replace(newline, newline + '  ')}"
         unit_effect = model.params[treatment]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
-        if str(self.df.dtypes[self.treatment]) != "object":
+        if self.treatment not in self.df.dtypes or str(self.df.dtypes[self.treatment]) != "object":
             unit_effect = unit_effect[0]
             ci_low = ci_low[0]
             ci_high = ci_high[0]

From c255ce0cc194cc76b9f857d3a3fe4a801c6e1968 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 13 Feb 2024 12:55:54 +0000
Subject: [PATCH 02/36] refactor estimate_coefficent to only return pd.Series

---
 causal_testing/testing/estimators.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 708fa48d..5fc66b28 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -351,19 +351,16 @@ def estimate_coefficient(self) -> float:
         """
         model = self._run_linear_regression()
         newline = "\n"
-        treatment = [self.treatment]
         if self.treatment in self.df.dtypes and str(self.df.dtypes[self.treatment]) == "object":
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
             treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
+        else:
+            treatment = [self.treatment]
         assert set(treatment).issubset(
             model.params.index.tolist()
         ), f"{treatment} not in\n{'  ' + str(model.params.index).replace(newline, newline + '  ')}"
         unit_effect = model.params[treatment]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
-        if self.treatment not in self.df.dtypes or str(self.df.dtypes[self.treatment]) != "object":
-            unit_effect = unit_effect[0]
-            ci_low = ci_low[0]
-            ci_high = ci_high[0]
         return unit_effect, [ci_low, ci_high]
 
     def estimate_ate(self) -> tuple[float, list[float, float], float]:

From 47363911a7b55b1c1073b66533a2fdb450d55393 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 13 Feb 2024 12:56:23 +0000
Subject: [PATCH 03/36] Adapt unit tests to access series values for
 coefficents

---
 tests/testing_tests/test_estimators.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
index 8232f57e..05a9038c 100644
--- a/tests/testing_tests/test_estimators.py
+++ b/tests/testing_tests/test_estimators.py
@@ -231,7 +231,7 @@ def test_program_11_2(self):
         self.assertEqual(round(model.params["Intercept"] + 90 * model.params["treatments"], 1), 216.9)
 
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 1), round(ate, 1))
+        self.assertEqual(round(model.params["treatments"], 1), round(ate[0], 1))
 
     def test_program_11_3(self):
         """Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
@@ -251,7 +251,7 @@ def test_program_11_3(self):
             197.1,
         )
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 3), round(ate, 3))
+        self.assertEqual(round(model.params["treatments"], 3), round(ate[0], 3))
 
     def test_program_15_1A(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)."""
@@ -329,8 +329,8 @@ def test_program_15_no_interaction(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_coefficient()
-        self.assertEqual(round(ate, 1), 3.5)
-        self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [2.6, 4.3])
+        self.assertEqual(round(ate[0], 1), 3.5)
+        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
 
     def test_program_15_no_interaction_ate(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)

From 1eac357fe677ca00e73a72cc7e6b4f5751caf577 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 13 Feb 2024 16:03:30 +0000
Subject: [PATCH 04/36] Fetch factors from Patsy to check types

---
 causal_testing/testing/estimators.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 5fc66b28..0ace57f6 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -11,7 +11,7 @@
 import statsmodels.formula.api as smf
 from econml.dml import CausalForestDML
 from patsy import dmatrix  # pylint: disable = no-name-in-module
-
+from patsy import ModelDesc
 from sklearn.ensemble import GradientBoostingRegressor
 from statsmodels.regression.linear_model import RegressionResultsWrapper
 from statsmodels.tools.sm_exceptions import PerfectSeparationError
@@ -351,7 +351,8 @@ def estimate_coefficient(self) -> float:
         """
         model = self._run_linear_regression()
         newline = "\n"
-        if self.treatment in self.df.dtypes and str(self.df.dtypes[self.treatment]) == "object":
+        patsy_md = ModelDesc.from_formula(self.treatment)
+        if any((self.df.dtypes[factor.name()] == 'object' for factor in patsy_md.rhs_termlist[1].factors)):
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
             treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
         else:

From f616091ba008b96d4300c0a487798b8046b68571 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 13 Feb 2024 16:13:22 +0000
Subject: [PATCH 05/36] Return float rather than Series for ci_high and ci_low

---
 causal_testing/testing/causal_test_result.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 7963afd1..9bb4060a 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -99,13 +99,13 @@ def to_dict(self, json=False):
     def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
         if self.confidence_intervals:
-            return self.confidence_intervals[0]
+            return self.confidence_intervals[0][0]
         return None
 
     def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
         if self.confidence_intervals:
-            return self.confidence_intervals[1]
+            return self.confidence_intervals[1][0]
         return None
 
     def ci_valid(self) -> bool:

From 1e8ad892c20435b9ef8eb1aee05a953906c3b649 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 10:54:15 +0000
Subject: [PATCH 06/36] Handle float and series confidence intervals

---
 causal_testing/testing/causal_test_result.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 9bb4060a..31c5d464 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -99,13 +99,18 @@ def to_dict(self, json=False):
     def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
         if self.confidence_intervals:
-            return self.confidence_intervals[0][0]
+            try:
+                return self.confidence_intervals[0][0]
+            except TypeError:
+                return self.confidence_intervals[0]
         return None
 
     def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
-        if self.confidence_intervals:
+        try:
             return self.confidence_intervals[1][0]
+        except TypeError:
+            return self.confidence_intervals[1]
         return None
 
     def ci_valid(self) -> bool:

From abe85995710859740d5d05353a7f274eda276c02 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 10:57:48 +0000
Subject: [PATCH 07/36] Handle correct exception

---
 causal_testing/testing/causal_test_result.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 31c5d464..5b95f83e 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -101,7 +101,7 @@ def ci_low(self):
         if self.confidence_intervals:
             try:
                 return self.confidence_intervals[0][0]
-            except TypeError:
+            except IndexError:
                 return self.confidence_intervals[0]
         return None
 
@@ -109,7 +109,7 @@ def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
         try:
             return self.confidence_intervals[1][0]
-        except TypeError:
+        except IndexError:
             return self.confidence_intervals[1]
         return None
 

From 767e5b43f737c3d6aa595b65a9c747e1eaf27773 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 12:00:55 +0000
Subject: [PATCH 08/36] More flexible handling due to multiple float types
 returns by estimators

---
 causal_testing/testing/causal_test_result.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 5b95f83e..962d5140 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -99,18 +99,19 @@ def to_dict(self, json=False):
     def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
         if self.confidence_intervals:
-            try:
+            if isinstance(self.confidence_intervals[0], pd.Series):
                 return self.confidence_intervals[0][0]
-            except IndexError:
+            else:
                 return self.confidence_intervals[0]
         return None
 
     def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
-        try:
-            return self.confidence_intervals[1][0]
-        except IndexError:
-            return self.confidence_intervals[1]
+        if self.confidence_intervals:
+            if isinstance(self.confidence_intervals[1], pd.Series):
+                return self.confidence_intervals[1][0]
+            else:
+                return self.confidence_intervals[1]
         return None
 
     def ci_valid(self) -> bool:

From 2724d947167bd2c367a8fbc30b9167b9847819a3 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 12:01:18 +0000
Subject: [PATCH 09/36] Handle series values

---
 causal_testing/testing/causal_test_outcome.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index dc412e21..8dde3983 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -5,6 +5,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
 import numpy as np
+import pandas as pd
 
 from causal_testing.testing.causal_test_result import CausalTestResult
 
@@ -57,7 +58,7 @@ def apply(self, res: CausalTestResult) -> bool:
             ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
             ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
-
+            value = value[0] if isinstance(value[0], pd.Series) else value
             return (
                 sum(
                     not ((ci_low < 0 < ci_high) or abs(v) < self.atol)

From da87421820e83b75d952096c988ae7fcc34d7060 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 12:09:53 +0000
Subject: [PATCH 10/36] Linting

---
 causal_testing/testing/causal_test_result.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 962d5140..bcb6d870 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -101,8 +101,7 @@ def ci_low(self):
         if self.confidence_intervals:
             if isinstance(self.confidence_intervals[0], pd.Series):
                 return self.confidence_intervals[0][0]
-            else:
-                return self.confidence_intervals[0]
+            return self.confidence_intervals[0]
         return None
 
     def ci_high(self):
@@ -110,8 +109,7 @@ def ci_high(self):
         if self.confidence_intervals:
             if isinstance(self.confidence_intervals[1], pd.Series):
                 return self.confidence_intervals[1][0]
-            else:
-                return self.confidence_intervals[1]
+            return self.confidence_intervals[1]
         return None
 
     def ci_valid(self) -> bool:

From ea8c273799e6d03e06757034a3ebf855459de860 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 14:24:35 +0000
Subject: [PATCH 11/36] refactor all estimate_* return types to be pd.Series
 for LinearRegressionEstimator

---
 causal_testing/testing/estimators.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 0ace57f6..7fe42d41 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -382,8 +382,9 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
 
         # Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
-        ate = t_test_results.effect[0]
+        ate = pd.Series(t_test_results.effect[0])
         confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
+        confidence_intervals = (pd.Series(interval) for interval in confidence_intervals)
         return ate, confidence_intervals
 
     def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd.Series, pd.Series]:
@@ -421,8 +422,9 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, li
         if adjustment_config is None:
             adjustment_config = {}
         control_outcome, treatment_outcome = self.estimate_control_treatment(adjustment_config=adjustment_config)
-        ci_low = treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"]
-        ci_high = treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"]
+        ci_low = pd.Series(treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"])
+        ci_high = pd.Series(treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"])
+        return pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
 
         return (treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
 
@@ -437,10 +439,9 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[float
         if adjustment_config is None:
             adjustment_config = {}
         control_outcome, treatment_outcome = self.estimate_control_treatment(adjustment_config=adjustment_config)
-        ci_low = treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"]
-        ci_high = treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"]
-
-        return (treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
+        ci_low = pd.Series(treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"])
+        ci_high = pd.Series(treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"])
+        return pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
 
     def _run_linear_regression(self) -> RegressionResultsWrapper:
         """Run linear regression of the treatment and adjustment set against the outcome and return the model.

From c777503766b37d5b6c5f4c89f57281173651cd86 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 14:26:32 +0000
Subject: [PATCH 12/36] Update return typings

---
 causal_testing/testing/estimators.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 7fe42d41..0b294dd6 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -343,7 +343,7 @@ def add_modelling_assumptions(self):
             "do not need to be linear."
         )
 
-    def estimate_coefficient(self) -> float:
+    def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
         caused by a unit change in treatment.
 
@@ -364,7 +364,7 @@ def estimate_coefficient(self) -> float:
         [ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
         return unit_effect, [ci_low, ci_high]
 
-    def estimate_ate(self) -> tuple[float, list[float, float], float]:
+    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -413,7 +413,7 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
 
         return y.iloc[1], y.iloc[0]
 
-    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, list[float, float]]:
+    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -426,9 +426,7 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, li
         ci_high = pd.Series(treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"])
         return pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
 
-        return (treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
-
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[float, list[float, float]]:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This

From fd7f79d562b567740a5b84df1ade079389d2ed30 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 14:55:13 +0000
Subject: [PATCH 13/36] Refactor other estimator classes to return pd.Series

---
 causal_testing/testing/estimators.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 0b294dd6..66f6a034 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -492,7 +492,7 @@ def __init__(
             terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
             self.formula = f"{outcome} ~ cr({'+'.join(terms)}, df={basis})"
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> float:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
         model = self._run_linear_regression()
 
         x = {"Intercept": 1, self.treatment: self.treatment_value}
@@ -508,7 +508,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> float:
         x[self.treatment] = self.control_value
         control = model.predict(x).iloc[0]
 
-        return treatment - control
+        return pd.Series(treatment - control)
 
 
 class InstrumentalVariableEstimator(Estimator):
@@ -564,7 +564,7 @@ def add_modelling_assumptions(self):
         """
         )
 
-    def estimate_iv_coefficient(self, df):
+    def estimate_iv_coefficient(self, df) -> float:
         """
         Estimate the linear regression coefficient of the treatment on the
         outcome.
@@ -578,7 +578,7 @@ def estimate_iv_coefficient(self, df):
         # Estimate the coefficient of I on X by cancelling
         return ab / a
 
-    def estimate_coefficient(self, bootstrap_size=100):
+    def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """
         Estimate the unit ate (i.e. coefficient) of the treatment on the
         outcome.
@@ -587,10 +587,10 @@ def estimate_coefficient(self, bootstrap_size=100):
             [self.estimate_iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
         )
         bound = ceil((bootstrap_size * self.alpha) / 2)
-        ci_low = bootstraps[bound]
-        ci_high = bootstraps[bootstrap_size - bound]
+        ci_low = pd.Series(bootstraps[bound])
+        ci_high = pd.Series(bootstraps[bootstrap_size - bound])
 
-        return self.estimate_iv_coefficient(self.df), (ci_low, ci_high)
+        return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
 
 
 class CausalForestEstimator(Estimator):
@@ -607,7 +607,7 @@ def add_modelling_assumptions(self):
         """
         self.modelling_assumptions.append("Non-parametric estimator: no restrictions imposed on the data.")
 
-    def estimate_ate(self) -> float:
+    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the average treatment effect.
 
         :return ate, confidence_intervals: The average treatment effect and 95% confidence intervals.
@@ -635,9 +635,9 @@ def estimate_ate(self) -> float:
         model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)
 
         # Obtain the ATE and 95% confidence intervals
-        ate = model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
+        ate = pd.Series(model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value))
         ate_interval = model.ate_interval(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
-        ci_low, ci_high = ate_interval[0], ate_interval[1]
+        ci_low, ci_high = pd.Series(ate_interval[0]), pd.Series(ate_interval[1])
         return ate, [ci_low, ci_high]
 
     def estimate_cates(self) -> pd.DataFrame:

From 8f9499dc87d5dd24882c7fbe1958bd3fe95f6a22 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 15:55:05 +0000
Subject: [PATCH 14/36] Extract bool from series

---
 causal_testing/testing/causal_test_outcome.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 8dde3983..e63bc39e 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -53,7 +53,7 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.test_value.type == "ate":
-            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < self.atol)
+            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < self.atol)[0]
         if res.test_value.type == "coefficient":
             ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
             ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]

From 3bcefc7ddeca283c9c4454a301980e8925dc7fe5 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 15:55:28 +0000
Subject: [PATCH 15/36] Remove gen expression so elements can be indexed

---
 causal_testing/testing/estimators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 66f6a034..041a9a93 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -384,7 +384,7 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
         ate = pd.Series(t_test_results.effect[0])
         confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
-        confidence_intervals = (pd.Series(interval) for interval in confidence_intervals)
+        confidence_intervals = [pd.Series(interval) for interval in confidence_intervals]
         return ate, confidence_intervals
 
     def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd.Series, pd.Series]:

From e43bf38c789ab3779e1b34eff331a6b3005c44e8 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 16:33:59 +0000
Subject: [PATCH 16/36] All effects now expect pd.Series for the test values

---
 causal_testing/testing/causal_test_outcome.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index e63bc39e..cfb2f0c8 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -98,10 +98,9 @@ def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
         if res.test_value.type in {"ate", "coefficient"}:
-            return bool(res.test_value.value > 0)
+            return bool(res.test_value.value[0] > 0)
         if res.test_value.type == "risk_ratio":
-            return bool(res.test_value.value > 1)
-        # Dead code but necessary for pylint
+            return bool(res.test_value.value[0] > 1)
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
@@ -112,8 +111,8 @@ def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
         if res.test_value.type in {"ate", "coefficient"}:
-            return bool(res.test_value.value < 0)
+            return bool(res.test_value.value[0] < 0)
         if res.test_value.type == "risk_ratio":
-            return bool(res.test_value.value < 1)
+            return bool(res.test_value.value[0] < 1)
         # Dead code but necessary for pylint
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")

From ace2612ba072343a7970151b22c2799b506af7b4 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 16:35:49 +0000
Subject: [PATCH 17/36] Update all unit tests to work with pd.Series refactor

---
 tests/testing_tests/test_causal_test_case.py  |  8 ++---
 .../testing_tests/test_causal_test_outcome.py | 33 ++++++++++---------
 tests/testing_tests/test_causal_test_suite.py |  6 ++--
 tests/testing_tests/test_estimators.py        | 20 +++++------
 4 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 2e2ab52e..92096251 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -118,7 +118,7 @@ def test_execute_test_observational_causal_forest_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1)
+        self.assertAlmostEqual(causal_test_result.test_value.value[0], 4, delta=1)
 
     def test_invalid_causal_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -140,7 +140,7 @@ def test_execute_test_observational_linear_regression_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1e-10)
+        self.assertAlmostEqual(causal_test_result.test_value.value[0], 4, delta=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_direct_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -167,7 +167,7 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
             self.df,
         )
         causal_test_result = causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1e-10)
+        self.assertAlmostEqual(causal_test_result.test_value.value[0], 4, delta=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_coefficient(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -227,7 +227,7 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
             formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(round(causal_test_result.test_value.value, 1), 4, delta=1)
+        self.assertAlmostEqual(round(causal_test_result.test_value.value[0], 1), 4, delta=1)
 
     def test_execute_observational_causal_forest_estimator_cates(self):
         """Check that executing the causal test case returns the correct conditional average treatment effects for
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
index 74e724e3..3eadb8d5 100644
--- a/tests/testing_tests/test_causal_test_outcome.py
+++ b/tests/testing_tests/test_causal_test_outcome.py
@@ -1,4 +1,5 @@
 import unittest
+import pandas as pd
 from causal_testing.testing.causal_test_outcome import ExactValue, SomeEffect, Positive, Negative, NoEffect
 from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
 from causal_testing.testing.estimators import LinearRegressionEstimator
@@ -69,7 +70,7 @@ def test_empty_adjustment_set(self):
         )
 
     def test_Positive_ate_pass(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -80,7 +81,7 @@ def test_Positive_ate_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Positive_risk_ratio_pass(self):
-        test_value = TestValue(type="risk_ratio", value=2)
+        test_value = TestValue(type="risk_ratio", value=pd.Series(2))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -91,7 +92,7 @@ def test_Positive_risk_ratio_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Positive_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -102,7 +103,7 @@ def test_Positive_fail(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_Positive_fail_ci(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -113,7 +114,7 @@ def test_Positive_fail_ci(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_Negative_ate_pass(self):
-        test_value = TestValue(type="ate", value=-5.05)
+        test_value = TestValue(type="ate", value=pd.Series(-5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -124,7 +125,7 @@ def test_Negative_ate_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Negative_risk_ratio_pass(self):
-        test_value = TestValue(type="risk_ratio", value=0.2)
+        test_value = TestValue(type="risk_ratio", value=pd.Series(0.2))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -135,7 +136,7 @@ def test_Negative_risk_ratio_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Negative_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -146,7 +147,7 @@ def test_Negative_fail(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_Negative_fail_ci(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -157,7 +158,7 @@ def test_Negative_fail_ci(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_exactValue_pass(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -168,7 +169,7 @@ def test_exactValue_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_pass_ci(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -179,7 +180,7 @@ def test_exactValue_pass_ci(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -190,7 +191,7 @@ def test_exactValue_fail(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_invalid(self):
-        test_value = TestValue(type="invalid", value=5.05)
+        test_value = TestValue(type="invalid", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -207,7 +208,7 @@ def test_invalid(self):
             Negative().apply(ctr)
 
     def test_someEffect_pass_coefficient(self):
-        test_value = TestValue(type="coefficient", value=5.05)
+        test_value = TestValue(type="coefficient", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -218,7 +219,7 @@ def test_someEffect_pass_coefficient(self):
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_pass_ate(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -229,7 +230,7 @@ def test_someEffect_pass_ate(self):
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_pass_rr(self):
-        test_value = TestValue(type="risk_ratio", value=5.05)
+        test_value = TestValue(type="risk_ratio", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -240,7 +241,7 @@ def test_someEffect_pass_rr(self):
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
index b3d0f448..1fd28bb3 100644
--- a/tests/testing_tests/test_causal_test_suite.py
+++ b/tests/testing_tests/test_causal_test_suite.py
@@ -98,7 +98,7 @@ def test_execute_test_suite_single_base_test_case(self):
 
         causal_test_results = self.test_suite.execute_test_suite(self.data_collector, self.causal_specification)
         causal_test_case_result = causal_test_results[self.base_test_case]
-        self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value, 4, delta=1e-10)
+        self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value[0], 4, delta=1e-10)
 
     def test_execute_test_suite_multiple_estimators(self):
         """Check that executing a test suite with multiple estimators returns correct results for the dummy data
@@ -114,5 +114,5 @@ def test_execute_test_suite_multiple_estimators(self):
         causal_test_case_result = causal_test_results[self.base_test_case]
         linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0]
         causal_forrest_result = causal_test_case_result["CausalForestEstimator"][0]
-        self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1)
-        self.assertAlmostEqual(causal_forrest_result.test_value.value, 4, delta=1e-1)
+        self.assertAlmostEqual(linear_regression_result.test_value.value[0], 4, delta=1e-1)
+        self.assertAlmostEqual(causal_forrest_result.test_value.value[0], 4, delta=1e-1)
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
index 05a9038c..8fbf70b9 100644
--- a/tests/testing_tests/test_estimators.py
+++ b/tests/testing_tests/test_estimators.py
@@ -199,7 +199,7 @@ def test_estimate_coefficient(self):
             instrument="Z",
         )
         coefficient, [low, high] = iv_estimator.estimate_coefficient()
-        self.assertEqual(coefficient, 2)
+        self.assertEqual(coefficient[0], 2)
 
 
 class TestLinearRegressionEstimator(unittest.TestCase):
@@ -364,8 +364,8 @@ def test_program_15_no_interaction_ate(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate()
-        self.assertEqual(round(ate, 1), 3.5)
-        self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [2.6, 4.3])
+        self.assertEqual(round(ate[0], 1), 3.5)
+        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
 
     def test_program_15_no_interaction_ate_calculated(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)
@@ -402,8 +402,8 @@ def test_program_15_no_interaction_ate_calculated(self):
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate_calculated(
             adjustment_config={k: self.nhefs_df.mean()[k] for k in covariates}
         )
-        self.assertEqual(round(ate, 1), 3.5)
-        self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [1.9, 5])
+        self.assertEqual(round(ate[0], 1), 3.5)
+        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [1.9, 5])
 
     def test_program_11_2_with_robustness_validation(self):
         """Test whether our linear regression estimator, as used in test_program_11_2 can correctly estimate robustness."""
@@ -449,8 +449,8 @@ def test_program_11_3_cublic_spline(self):
         ate_2 = cublic_spline_estimator.estimate_ate_calculated()
 
         # Doubling the treatemebnt value should roughly but not exactly double the ATE
-        self.assertNotEqual(ate_1 * 2, ate_2)
-        self.assertAlmostEqual(ate_1 * 2, ate_2)
+        self.assertNotEqual(ate_1[0] * 2, ate_2[0])
+        self.assertAlmostEqual(ate_1[0] * 2, ate_2[0])
 
 
 
@@ -488,8 +488,8 @@ def test_program_15_ate(self):
         }
         causal_forest = CausalForestEstimator("qsmk", 1, 0, covariates, "wt82_71", df, {"smokeintensity": 40})
         ate, _ = causal_forest.estimate_ate()
-        self.assertGreater(round(ate, 1), 2.5)
-        self.assertLess(round(ate, 1), 4.5)
+        self.assertGreater(round(ate[0], 1), 2.5)
+        self.assertLess(round(ate[0], 1), 4.5)
 
     def test_program_15_cate(self):
         """Test whether our causal forest implementation produces the similar CATE to program 15.1 (p. 163, 184)."""
@@ -535,7 +535,7 @@ def test_X1_effect(self):
             "X1", 1, 0, {"X2"}, "Y", effect_modifiers={x2.name: 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
         )
         test_results = lr_model.estimate_ate()
-        ate = test_results[0]
+        ate = test_results[0][0]
         self.assertAlmostEqual(ate, 2.0)
 
 

From 18883d86a889d43298f9e553218c1b1734007e6e Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 14 Feb 2024 17:00:50 +0000
Subject: [PATCH 18/36] example_poisson_process.py now works with pd.Series
 refactor

---
 examples/poisson-line-process/example_poisson_process.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/poisson-line-process/example_poisson_process.py b/examples/poisson-line-process/example_poisson_process.py
index ae8d07e0..820c3537 100644
--- a/examples/poisson-line-process/example_poisson_process.py
+++ b/examples/poisson-line-process/example_poisson_process.py
@@ -198,8 +198,8 @@ def test_poisson_width_num_shapes(save=False):
                 "treatment": treatment_value,
                 "intensity": i,
                 "ate": causal_test_result.test_value.value,
-                "ci_low": min(causal_test_result.confidence_intervals),
-                "ci_high": max(causal_test_result.confidence_intervals),
+                "ci_low": causal_test_result.confidence_intervals[0][0],
+                "ci_high": causal_test_result.confidence_intervals[1][0],
             }
             width_num_shapes_results.append(results)
     width_num_shapes_results = pd.DataFrame(width_num_shapes_results)

From 8590dc94250baab5bbe770ec4a3e8712cdb2ba67 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:20:35 +0000
Subject: [PATCH 19/36] Update typing

---
 causal_testing/testing/causal_test_result.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index bcb6d870..afae6195 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -27,7 +27,7 @@ def __init__(
         self,
         estimator: Estimator,
         test_value: TestValue,
-        confidence_intervals: [float, float] = None,
+        confidence_intervals: [pd.Series, pd.Series] = None,
         effect_modifier_configuration: {Variable: Any} = None,
         adequacy=None,
     ):
@@ -100,7 +100,7 @@ def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
         if self.confidence_intervals:
             if isinstance(self.confidence_intervals[0], pd.Series):
-                return self.confidence_intervals[0][0]
+                return self.confidence_intervals[0].to_list()
             return self.confidence_intervals[0]
         return None
 
@@ -108,7 +108,7 @@ def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
         if self.confidence_intervals:
             if isinstance(self.confidence_intervals[1], pd.Series):
-                return self.confidence_intervals[1][0]
+                return self.confidence_intervals[1].to_list()
             return self.confidence_intervals[1]
         return None
 

From 8e33b25538b0909ddf1199ad78b945eba2e02f80 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:20:51 +0000
Subject: [PATCH 20/36] Update tests to use pd.Series for confidence intervals

---
 .../testing_tests/test_causal_test_outcome.py  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
index f229dae1..142e5f66 100644
--- a/tests/testing_tests/test_causal_test_outcome.py
+++ b/tests/testing_tests/test_causal_test_outcome.py
@@ -151,7 +151,7 @@ def test_Negative_fail_ci(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-1, 1],
+            confidence_intervals=[pd.Series(-1), pd.Series(1)],
             effect_modifier_configuration=None,
         )
         ev = Negative()
@@ -173,7 +173,7 @@ def test_exactValue_pass_ci(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4, 6],
+            confidence_intervals=[pd.Series(4), pd.Series(6)],
             effect_modifier_configuration=None,
         )
         ev = ExactValue(5, 0.1)
@@ -199,7 +199,7 @@ def test_invalid(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         with self.assertRaises(ValueError):
@@ -216,7 +216,7 @@ def test_someEffect_pass_coefficient(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
@@ -227,7 +227,7 @@ def test_someEffect_pass_ate(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
@@ -238,7 +238,7 @@ def test_someEffect_pass_rr(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
@@ -249,7 +249,7 @@ def test_someEffect_fail(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         self.assertFalse(SomeEffect().apply(ctr))
@@ -260,7 +260,7 @@ def test_someEffect_str(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         ev = SomeEffect()
@@ -284,7 +284,7 @@ def test_someEffect_dict(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         ev = SomeEffect()

From 964131933c67ea6dc7fc91e14dbfac9668f0119f Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:57:01 +0000
Subject: [PATCH 21/36] Dictionary assertions use list CIs

---
 tests/testing_tests/test_causal_test_outcome.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
index 142e5f66..ac4a9716 100644
--- a/tests/testing_tests/test_causal_test_outcome.py
+++ b/tests/testing_tests/test_causal_test_outcome.py
@@ -107,7 +107,7 @@ def test_Positive_fail_ci(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-1, 1],
+            confidence_intervals=[pd.Series(-1), pd.Series(1)],
             effect_modifier_configuration=None,
         )
         ev = Positive()
@@ -274,8 +274,8 @@ def test_someEffect_str(self):
                 "adjustment_set": set(),
                 "effect_estimate": 0,
                 "effect_measure": "ate",
-                "ci_low": -0.1,
-                "ci_high": 0.2,
+                "ci_low": [-0.1],
+                "ci_high": [0.2],
             },
         )
 
@@ -298,8 +298,8 @@ def test_someEffect_dict(self):
                 "adjustment_set": set(),
                 "effect_estimate": 0,
                 "effect_measure": "ate",
-                "ci_low": -0.1,
-                "ci_high": 0.2,
+                "ci_low": [-0.1],
+                "ci_high": [0.2],
             },
         )
 

From 35bae1f1c8eb96120bc76ce3509b15a6ac640464 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:59:26 +0000
Subject: [PATCH 22/36] SomeEffect and NoneEffect applys now work with
 pd.Series

---
 causal_testing/testing/causal_test_outcome.py | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 8296ce02..87b54ef1 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -28,14 +28,13 @@ class SomeEffect(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should not be zero."""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (0 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 0)
-        if res.test_value.type == "coefficient":
-            ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
-            ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
-            return any(0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(ci_low, ci_high))
         if res.test_value.type == "risk_ratio":
-            return (1 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 1)
+            return any(
+                1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+        if res.test_value.type == "coefficient" or res.test_value.type == "ate":
+            return any(
+                0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
@@ -52,23 +51,21 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < self.atol)[0]
-        if res.test_value.type == "coefficient":
-            ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
-            ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
+        if res.test_value.type == "risk_ratio":
+            return any(ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in
+                       zip(res.ci_low(), res.ci_high(), res.test_value.value))
+        elif res.test_value.type == "coefficient" or res.test_value.type == "ate":
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
             value = value[0] if isinstance(value[0], pd.Series) else value
             return (
-                sum(
-                    not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
-                    for ci_low, ci_high, v in zip(ci_low, ci_high, value)
-                )
-                / len(value)
-                < self.ctol
+                    sum(
+                        not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
+                        for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value)
+                    )
+                    / len(value)
+                    < self.ctol
             )
-        if res.test_value.type == "risk_ratio":
-            return (res.ci_low() < 1 < res.ci_high()) or np.isclose(res.test_value.value, 1.0, atol=self.atol)
+
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 

From 6a5987a538ccef54125dcf933da38c9f3767b2c4 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:59:49 +0000
Subject: [PATCH 23/36] _get_confidence_intervals method returns pd.Series

---
 causal_testing/testing/estimators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index 041a9a93..895124bf 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -453,8 +453,8 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
     def _get_confidence_intervals(self, model, treatment):
         confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
         ci_low, ci_high = (
-            confidence_intervals[0].loc[treatment],
-            confidence_intervals[1].loc[treatment],
+            pd.Series(confidence_intervals[0].loc[treatment]),
+            pd.Series(confidence_intervals[1].loc[treatment]),
         )
         return [ci_low, ci_high]
 

From 33e7e537d8e7b38050c75ce9cffc9acf71b66e78 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 16:25:04 +0000
Subject: [PATCH 24/36] Remove unnecessary unpacking of value

---
 causal_testing/testing/causal_test_outcome.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 87b54ef1..f3524b6c 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -56,7 +56,6 @@ def apply(self, res: CausalTestResult) -> bool:
                        zip(res.ci_low(), res.ci_high(), res.test_value.value))
         elif res.test_value.type == "coefficient" or res.test_value.type == "ate":
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
-            value = value[0] if isinstance(value[0], pd.Series) else value
             return (
                     sum(
                         not ((ci_low < 0 < ci_high) or abs(v) < self.atol)

From 123d4dbaf8e765ed751f9dc390233b7072baf124 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 16:51:27 +0000
Subject: [PATCH 25/36] tests represent the logic of returning Series better

---
 tests/testing_tests/test_estimators.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
index c6343185..e8ac8d28 100644
--- a/tests/testing_tests/test_estimators.py
+++ b/tests/testing_tests/test_estimators.py
@@ -217,7 +217,7 @@ def test_program_11_2(self):
         self.assertEqual(round(model.params["Intercept"] + 90 * model.params["treatments"], 1), 216.9)
 
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 1), round(ate[0], 1))
+        self.assertTrue(all(round(model.params["treatments"], 1) == round(ate_single, 1) for ate_single in ate))
 
     def test_program_11_3(self):
         """Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
@@ -237,7 +237,7 @@ def test_program_11_3(self):
             197.1,
         )
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 3), round(ate[0], 3))
+        self.assertTrue(all(round(model.params["treatments"], 3) == round(ate_single, 3) for ate_single in ate))
 
     def test_program_15_1A(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)."""
@@ -315,6 +315,7 @@ def test_program_15_no_interaction(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_coefficient()
+
         self.assertEqual(round(ate[0], 1), 3.5)
         self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
 

From 17b86929f991f3ff1bf5476d6df1b0c46bef67b9 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Fri, 23 Feb 2024 16:55:01 +0000
Subject: [PATCH 26/36] Pylint suggestions

---
 causal_testing/testing/causal_test_outcome.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index f3524b6c..ff7a545b 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -31,7 +31,7 @@ def apply(self, res: CausalTestResult) -> bool:
         if res.test_value.type == "risk_ratio":
             return any(
                 1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
-        if res.test_value.type == "coefficient" or res.test_value.type == "ate":
+        if res.test_value.type in ('coefficient', 'ate'):
             return any(
                 0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
 
@@ -54,7 +54,7 @@ def apply(self, res: CausalTestResult) -> bool:
         if res.test_value.type == "risk_ratio":
             return any(ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in
                        zip(res.ci_low(), res.ci_high(), res.test_value.value))
-        elif res.test_value.type == "coefficient" or res.test_value.type == "ate":
+        if res.test_value.type in ('coefficient', 'ate'):
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
             return (
                     sum(

From 4e06f3442076cd0ade3c486b8e21eb7f59fcd51d Mon Sep 17 00:00:00 2001
From: Richard Somers <rsomers1@sheffield.ac.uk>
Date: Mon, 26 Feb 2024 08:50:28 +0000
Subject: [PATCH 27/36] Update surrogate code for new series return vals

---
 causal_testing/surrogate/surrogate_search_algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index 71e5d655..da1f0aeb 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -46,7 +46,7 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
 
                 ate = surrogate.estimate_ate_calculated(adjustment_dict)
 
-                return contradiction_function(ate)
+                return contradiction_function(ate[0])
 
             gene_types, gene_space = self.create_gene_types(surrogate, specification)
 

From d742f74ac30d233ef1b79215574027c110d8e443 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 09:00:30 +0000
Subject: [PATCH 28/36] Remove unused import

---
 causal_testing/testing/causal_test_outcome.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index ff7a545b..67f4d087 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -5,7 +5,6 @@
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
 import numpy as np
-import pandas as pd
 
 from causal_testing.testing.causal_test_result import CausalTestResult
 

From 425329bfc9c898e03fed243438f8b86a569a3071 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 09:05:13 +0000
Subject: [PATCH 29/36] Update LR91 examples

---
 examples/lr91/example_max_conductances.py            | 4 ++--
 examples/lr91/example_max_conductances_test_suite.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/lr91/example_max_conductances.py b/examples/lr91/example_max_conductances.py
index d73ae6f8..bef94f04 100644
--- a/examples/lr91/example_max_conductances.py
+++ b/examples/lr91/example_max_conductances.py
@@ -164,8 +164,8 @@ def plot_ates_with_cis(results_dict: dict, xs: list, save: bool = False, show: b
         before_underscore, after_underscore = treatment.split("_")
         after_underscore_braces = f"{{{after_underscore}}}"
         latex_compatible_treatment_str = rf"${before_underscore}_{after_underscore_braces}$"
-        cis_low = [c[0] for c in cis]
-        cis_high = [c[1] for c in cis]
+        cis_low = [c[0][0] for c in cis]
+        cis_high = [c[1][0] for c in cis]
         axes.fill_between(
             xs, cis_low, cis_high, alpha=0.2, color=input_colors[treatment], label=latex_compatible_treatment_str
         )
diff --git a/examples/lr91/example_max_conductances_test_suite.py b/examples/lr91/example_max_conductances_test_suite.py
index c704777f..fe9c1e2c 100644
--- a/examples/lr91/example_max_conductances_test_suite.py
+++ b/examples/lr91/example_max_conductances_test_suite.py
@@ -166,8 +166,8 @@ def plot_ates_with_cis(results_dict: dict, xs: list, save: bool = False, show=Fa
         before_underscore, after_underscore = treatment.split("_")
         after_underscore_braces = f"{{{after_underscore}}}"
         latex_compatible_treatment_str = rf"${before_underscore}_{after_underscore_braces}$"
-        cis_low = [c[0] for c in cis]
-        cis_high = [c[1] for c in cis]
+        cis_low = [c[0][0] for c in cis]
+        cis_high = [c[1][0] for c in cis]
         axes.fill_between(
             xs, cis_low, cis_high, alpha=0.2, color=input_colors[treatment], label=latex_compatible_treatment_str
         )

From c026f6a56460061a32694ee4d11f16fef9cf8b86 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 09:51:51 +0000
Subject: [PATCH 30/36] Update example_beta.py

---
 examples/covasim_/doubling_beta/example_beta.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index 69a84cfa..d4ae15c0 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -276,8 +276,8 @@ def setup(observational_data):
 
 def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=None, col=None):
     # Get the CATE as a percentage for association and causation
-    ate = results_dict["causation"]["ate"]
-    association_ate = results_dict["association"]["ate"]
+    ate = results_dict["causation"]["ate"][0]
+    association_ate = results_dict["association"]["ate"][0]
 
     causation_df = results_dict["causation"]["df"]
     association_df = results_dict["association"]["df"]
@@ -288,11 +288,10 @@ def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=No
     # Get 95% confidence intervals for association and causation
     ate_cis = results_dict["causation"]["cis"]
     association_ate_cis = results_dict["association"]["cis"]
-    percentage_causal_ate_cis = [round(((ci / causation_df["cum_infections"].mean()) * 100), 3) for ci in ate_cis]
+    percentage_causal_ate_cis = [round(((ci[0] / causation_df["cum_infections"].mean()) * 100), 3) for ci in ate_cis]
     percentage_association_ate_cis = [
-        round(((ci / association_df["cum_infections"].mean()) * 100), 3) for ci in association_ate_cis
+        round(((ci[0] / association_df["cum_infections"].mean()) * 100), 3) for ci in association_ate_cis
     ]
-
     # Convert confidence intervals to errors for plotting
     percentage_causal_errs = [
         percentage_ate - percentage_causal_ate_cis[0],
@@ -314,9 +313,9 @@ def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=No
     if "counterfactual" in results_dict.keys():
         cf_ate = results_dict["counterfactual"]["ate"]
         cf_df = results_dict["counterfactual"]["df"]
-        percentage_cf_ate = round((cf_ate / cf_df["cum_infections"].mean()) * 100, 3)
+        percentage_cf_ate = round((cf_ate[0] / cf_df["cum_infections"].mean()) * 100, 3)
         cf_ate_cis = results_dict["counterfactual"]["cis"]
-        percentage_cf_cis = [round(((ci / cf_df["cum_infections"].mean()) * 100), 3) for ci in cf_ate_cis]
+        percentage_cf_cis = [round(((ci[0] / cf_df["cum_infections"].mean()) * 100), 3) for ci in cf_ate_cis]
         percentage_cf_errs = [percentage_cf_ate - percentage_cf_cis[0], percentage_cf_cis[1] - percentage_cf_ate]
         xs = [0.5, 1.5, 2.5]
         ys = [association_percentage_ate, percentage_ate, percentage_cf_ate]

From eb6bca6bf8d5b075d67d9225b9ff9c4a8d09d5b8 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 14:53:49 +0000
Subject: [PATCH 31/36] Raise exception for Positive and Negative effect if
 multiple values passed in

---
 causal_testing/testing/causal_test_outcome.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 67f4d087..3c1d54dd 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -94,6 +94,8 @@ class Positive(SomeEffect):
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
+        if len(res.test_value.value > 0):
+            raise ValueError("Positive Effects are currently only supported on single float datatypes")
         if res.test_value.type in {"ate", "coefficient"}:
             return bool(res.test_value.value[0] > 0)
         if res.test_value.type == "risk_ratio":
@@ -107,6 +109,8 @@ class Negative(SomeEffect):
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
+        if len(res.test_value.value > 0):
+            raise ValueError("Negative Effects are currently only supported on single float datatypes")
         if res.test_value.type in {"ate", "coefficient"}:
             return bool(res.test_value.value[0] < 0)
         if res.test_value.type == "risk_ratio":

From 5265e9f45993f6604ebf9ee27ac9cc13daff5953 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 16:02:20 +0000
Subject: [PATCH 32/36] Fix typo in check for value length

---
 causal_testing/testing/causal_test_outcome.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 3c1d54dd..15c95cb5 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -94,7 +94,7 @@ class Positive(SomeEffect):
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
-        if len(res.test_value.value > 0):
+        if len(res.test_value.value) > 1:
             raise ValueError("Positive Effects are currently only supported on single float datatypes")
         if res.test_value.type in {"ate", "coefficient"}:
             return bool(res.test_value.value[0] > 0)
@@ -109,7 +109,7 @@ class Negative(SomeEffect):
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
-        if len(res.test_value.value > 0):
+        if len(res.test_value.value) > 1:
             raise ValueError("Negative Effects are currently only supported on single float datatypes")
         if res.test_value.type in {"ate", "coefficient"}:
             return bool(res.test_value.value[0] < 0)

From fb287ec08a127ddee6d04b7236353efdf9de5e5d Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 16:23:11 +0000
Subject: [PATCH 33/36] Add test for catching multiple value exception

---
 tests/testing_tests/test_causal_test_outcome.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
index ac4a9716..235cc724 100644
--- a/tests/testing_tests/test_causal_test_outcome.py
+++ b/tests/testing_tests/test_causal_test_outcome.py
@@ -322,3 +322,16 @@ def test_negative_risk_ratio_e_value_using_ci(self):
         cv = CausalValidator()
         e_value = cv.estimate_e_value_using_ci(0.8, [0.2, 0.9])
         self.assertEqual(round(e_value, 4), 1.4625)
+
+    def test_multiple_value_exception_caught(self):
+        test_value = TestValue(type="ate", value=pd.Series([0, 1]))
+        ctr = CausalTestResult(
+            estimator=self.estimator,
+            test_value=test_value,
+            confidence_intervals=[None, None],
+            effect_modifier_configuration=None,
+        )
+        with self.assertRaises(ValueError):
+            Positive().apply(ctr)
+        with self.assertRaises(ValueError):
+            Negative().apply(ctr)

From 844849a63cb5a37879ecf58c452bcc09c93250dc Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Tue, 27 Feb 2024 16:54:46 +0000
Subject: [PATCH 34/36] Use pandas inbuilt assert_series_equal test instead of
 casting everything to single values

---
 tests/testing_tests/test_causal_test_case.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 92096251..433aeb64 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -118,7 +118,7 @@ def test_execute_test_observational_causal_forest_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value[0], 4, delta=1)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
 
     def test_invalid_causal_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -140,7 +140,7 @@ def test_execute_test_observational_linear_regression_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value[0], 4, delta=1e-10)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_direct_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -167,7 +167,7 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
             self.df,
         )
         causal_test_result = causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value[0], 4, delta=1e-10)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_coefficient(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -182,7 +182,7 @@ def test_execute_test_observational_linear_regression_estimator_coefficient(self
         )
         self.causal_test_case.estimate_type = "coefficient"
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertEqual(int(causal_test_result.test_value.value), 0)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series({'D': 0.0}), atol=1e-1)
 
     def test_execute_test_observational_linear_regression_estimator_risk_ratio(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -197,7 +197,7 @@ def test_execute_test_observational_linear_regression_estimator_risk_ratio(self)
         )
         self.causal_test_case.estimate_type = "risk_ratio"
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertEqual(int(causal_test_result.test_value.value), 0)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(0.0), atol=1)
 
     def test_invalid_estimate_type(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -227,7 +227,7 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
             formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(round(causal_test_result.test_value.value[0], 1), 4, delta=1)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
 
     def test_execute_observational_causal_forest_estimator_cates(self):
         """Check that executing the causal test case returns the correct conditional average treatment effects for

From 6029edb5169a2e5585905ce99a43e2b46dcc4981 Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 28 Feb 2024 08:55:05 +0000
Subject: [PATCH 35/36] Add limitation of single test_value to Effect
 docstrings

---
 causal_testing/testing/causal_test_outcome.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index 15c95cb5..0c3ae1e4 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -89,7 +89,8 @@ def __str__(self):
 
 
 class Positive(SomeEffect):
-    """An extension of TestOutcome representing that the expected causal effect should be positive."""
+    """An extension of TestOutcome representing that the expected causal effect should be positive.
+    Currently only single values are supported for the test value"""
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
@@ -104,7 +105,8 @@ def apply(self, res: CausalTestResult) -> bool:
 
 
 class Negative(SomeEffect):
-    """An extension of TestOutcome representing that the expected causal effect should be negative."""
+    """An extension of TestOutcome representing that the expected causal effect should be negative.
+    Currently only single values are supported for the test value"""
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):

From b8ad41958fd2aff034f741d310c4a85c595e62cb Mon Sep 17 00:00:00 2001
From: cwild-UoS <93984046+cwild-UoS@users.noreply.github.com>
Date: Wed, 28 Feb 2024 08:58:06 +0000
Subject: [PATCH 36/36] Ensure only single ate values are provided in
 surrogate_models

---
 .../surrogate/surrogate_search_algorithms.py           | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index da1f0aeb..94984b6a 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -35,8 +35,8 @@ def search(
 
             # The GA fitness function after including required variables into the function's scope
             # Unused arguments are required for pygad's fitness function signature
-            #pylint: disable=cell-var-from-loop
-            def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
+            # pylint: disable=cell-var-from-loop
+            def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
                 surrogate.control_value = solution[0] - self.delta
                 surrogate.treatment_value = solution[0] + self.delta
 
@@ -45,7 +45,9 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
                     adjustment_dict[adjustment] = solution[i + 1]
 
                 ate = surrogate.estimate_ate_calculated(adjustment_dict)
-
+                if len(ate) > 1:
+                    raise ValueError(
+                        "Multiple ate values provided but currently only single values supported in this method")
                 return contradiction_function(ate[0])
 
             gene_types, gene_space = self.create_gene_types(surrogate, specification)
@@ -82,7 +84,7 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
 
     @staticmethod
     def create_gene_types(
-        surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification
+            surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification
     ) -> tuple[list, list]:
         """Generate the gene_types and gene_space for a given fitness function and specification
         :param surrogate_model: Instance of a CubicSplineRegressionEstimator