Skip to content

Commit

Permalink
add chapter 8
Browse files Browse the repository at this point in the history
Signed-off-by: Rahul Shrestha <[email protected]>
  • Loading branch information
rahulbshrestha committed Apr 28, 2024
1 parent 47eee77 commit 8f93548
Showing 1 changed file with 80 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import pandas as pd
from pytest import mark
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LassoCV, LinearRegression, LogisticRegression
from tqdm import tqdm

from dowhy import CausalModel

# TODO: Add tests for Chapter 6, 7 (alreay below) + 9, 10, Extras_02

# TODO: Add tests for Chapter 6, 7 (already below) + 9, 10, Extras_02

class GPSMemorySCM:
def __init__(self, random_seed=None):
Expand Down Expand Up @@ -44,6 +45,82 @@ def intervene(self, treatment_value, sample_size=100):

@mark.usefixtures("fixed_seed")
class TestCausalInferenceDiscoveryBook(object):
def test_dowhy_chapter_8(self):
# Construct the graph (the graph is constant for all iterations)
nodes = ["S", "Q", "X", "Y", "P"]
edges = ["SQ", "SY", "QX", "QY", "XP", "YP", "XY"]

# Generate the GML graph
gml_string = "graph [directed 1\n"

for node in nodes:
gml_string += f'\tnode [id "{node}" label "{node}"]\n'

for edge in edges:
gml_string += f'\tedge [source "{edge[0]}" target "{edge[1]}"]\n'

gml_string += "]"

# Define the true effect
TRUE_EFFECT = 0.7

# Define experiment params
sample_sizes = [30, 100, 1000, 10000]
noise_coefs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
n_samples = 20

# Record the results
results = []

# Run the experiment
for sample_size in tqdm(sample_sizes):
for noise_coef in noise_coefs:
for i in range(n_samples):
# Generate the data
S = np.random.random(sample_size)
Q = 0.2 * S + noise_coef * np.random.random(sample_size)
X = 0.14 * Q + noise_coef * np.random.random(sample_size)
Y = TRUE_EFFECT * X + 0.11 * Q + 0.32 * S + noise_coef * np.random.random(sample_size)
P = 0.43 * X + 0.21 * Y + noise_coef * np.random.random(sample_size)

# Encode as a pandas df
df = pd.DataFrame(np.vstack([S, Q, X, Y, P]).T, columns=["S", "Q", "X", "Y", "P"])

# Instantiate the CausalModel
model = CausalModel(data=df, treatment="X", outcome="Y", graph=gml_string)

# Get the estimand
estimand = model.identify_effect()

# Get estimate (DML)
estimate_dml = model.estimate_effect(
identified_estimand=estimand,
method_name="backdoor.econml.dml.DML",
method_params={
"init_params": {
"model_y": GradientBoostingRegressor(),
"model_t": GradientBoostingRegressor(),
"model_final": LassoCV(fit_intercept=False),
},
"fit_params": {},
},
)

# Get estimate (Linear Regression)
estimate_lr = model.estimate_effect(
identified_estimand=estimand, method_name="backdoor.linear_regression"
)

results.append(
{
"sample_size": sample_size,
"noise_coef": noise_coef,
"estimate_dml": estimate_dml.value,
"estimate_lr": estimate_lr.value,
"error_dml": estimate_dml.value - TRUE_EFFECT,
"error_lr": estimate_lr.value - TRUE_EFFECT,
}
)

def test_dowhy_chapter_7(self):
# Instantiate the SCM
Expand Down

0 comments on commit 8f93548

Please sign in to comment.