add chapter 8

Signed-off-by: Rahul Shrestha <[email protected]>
py-why · Apr 28, 2024 · 8f93548 · 8f93548
1 parent 47eee77
commit 8f93548
Showing 1 changed file with 80 additions and 3 deletions.
diff --git a/tests/causal-inference-discovery-book/test_causal_inference_discovery_book.py b/tests/causal-inference-discovery-book/test_causal_inference_discovery_book.py
@@ -2,12 +2,13 @@
 import pandas as pd
 from pytest import mark
 from scipy import stats
-from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.linear_model import LassoCV, LinearRegression, LogisticRegression
+from tqdm import tqdm
 
 from dowhy import CausalModel
 
-# TODO: Add tests for Chapter 6, 7 (alreay below) + 9, 10, Extras_02
-
+# TODO: Add tests for Chapter 6, 7 (already below) + 9, 10, Extras_02
 
 class GPSMemorySCM:
     def __init__(self, random_seed=None):
@@ -44,6 +45,82 @@ def intervene(self, treatment_value, sample_size=100):
 
 @mark.usefixtures("fixed_seed")
 class TestCausalInferenceDiscoveryBook(object):
+    def test_dowhy_chapter_8(self):
+        # Construct the graph (the graph is constant for all iterations)
+        nodes = ["S", "Q", "X", "Y", "P"]
+        edges = ["SQ", "SY", "QX", "QY", "XP", "YP", "XY"]
+
+        # Generate the GML graph
+        gml_string = "graph [directed 1\n"
+
+        for node in nodes:
+            gml_string += f'\tnode [id "{node}" label "{node}"]\n'
+
+        for edge in edges:
+            gml_string += f'\tedge [source "{edge[0]}" target "{edge[1]}"]\n'
+
+        gml_string += "]"
+
+        # Define the true effect
+        TRUE_EFFECT = 0.7
+
+        # Define experiment params
+        sample_sizes = [30, 100, 1000, 10000]
+        noise_coefs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
+        n_samples = 20
+
+        # Record the results
+        results = []
+
+        # Run the experiment
+        for sample_size in tqdm(sample_sizes):
+            for noise_coef in noise_coefs:
+                for i in range(n_samples):
+                    # Generate the data
+                    S = np.random.random(sample_size)
+                    Q = 0.2 * S + noise_coef * np.random.random(sample_size)
+                    X = 0.14 * Q + noise_coef * np.random.random(sample_size)
+                    Y = TRUE_EFFECT * X + 0.11 * Q + 0.32 * S + noise_coef * np.random.random(sample_size)
+                    P = 0.43 * X + 0.21 * Y + noise_coef * np.random.random(sample_size)
+
+                    # Encode as a pandas df
+                    df = pd.DataFrame(np.vstack([S, Q, X, Y, P]).T, columns=["S", "Q", "X", "Y", "P"])
+
+                    # Instantiate the CausalModel
+                    model = CausalModel(data=df, treatment="X", outcome="Y", graph=gml_string)
+
+                    # Get the estimand
+                    estimand = model.identify_effect()
+
+                    # Get estimate (DML)
+                    estimate_dml = model.estimate_effect(
+                        identified_estimand=estimand,
+                        method_name="backdoor.econml.dml.DML",
+                        method_params={
+                            "init_params": {
+                                "model_y": GradientBoostingRegressor(),
+                                "model_t": GradientBoostingRegressor(),
+                                "model_final": LassoCV(fit_intercept=False),
+                            },
+                            "fit_params": {},
+                        },
+                    )
+
+                    # Get estimate (Linear Regression)
+                    estimate_lr = model.estimate_effect(
+                        identified_estimand=estimand, method_name="backdoor.linear_regression"
+                    )
+
+                    results.append(
+                        {
+                            "sample_size": sample_size,
+                            "noise_coef": noise_coef,
+                            "estimate_dml": estimate_dml.value,
+                            "estimate_lr": estimate_lr.value,
+                            "error_dml": estimate_dml.value - TRUE_EFFECT,
+                            "error_lr": estimate_lr.value - TRUE_EFFECT,
+                        }
+                    )
 
     def test_dowhy_chapter_7(self):
         # Instantiate the SCM