Skip to content

Commit

Permalink
Merge branch 'master' of github.com:DoubleML/doubleml-serverless into…
Browse files Browse the repository at this point in the history
… 0.0.X
  • Loading branch information
MalteKurz committed Jun 15, 2022
2 parents 2881ba4 + 7a95d0d commit 5e31300
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 99 deletions.
25 changes: 20 additions & 5 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,35 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.6', '3.7', '3.8', '3.9']
config:
- {python-version: '3.6', doubleml-version: 'release'}
- {python-version: '3.7', doubleml-version: 'release'}
- {python-version: '3.8', doubleml-version: 'release'}
- {python-version: '3.8', doubleml-version: 'dev'}
- {python-version: '3.9', doubleml-version: 'release'}

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python ${{ matrix.config.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
python-version: ${{ matrix.config.python-version }}
- uses: actions/checkout@v2
if: matrix.config.doubleml-version == 'dev'
with:
repository: DoubleML/doubleml-for-py
path: doubleml-for-py
- name: DoubleML dev version
if: matrix.config.doubleml-version == 'dev'
run: |
cd doubleml-for-py
pip install --editable .
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install pytest xgboost
pip install -r requirements.txt
pip install .
- name: Test with pytest
run: |
pytest
pytest doubleml_serverless/
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2020 Malte S. Kurz
Copyright (c) 2020-2021 Malte S. Kurz

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ DoubleML-Serverless is an extension for serverless cloud computing of the Python
DoubleML is available via PyPI [https://pypi.org/project/DoubleML](https://pypi.org/project/DoubleML) and on GitHub [https://github.com/DoubleML/doubleml-for-py](https://github.com/DoubleML/doubleml-for-py).
The Python package DoubleML was introduced in
"DoubleML - An Object-Oriented Implementation of Double Machine Learning in Python"
([Bach et al., 2021](https://arxiv.org/abs/2104.03220))
([Bach et al., 2022](https://www.jmlr.org/papers/v23/21-0862.html))
and a detailed documentation \& user guide for the package is available at
[https://docs.doubleml.org](https://docs.doubleml.org).

Expand Down Expand Up @@ -149,9 +149,10 @@ Bibtex-entry:

## References

Bach, P., Chernozhukov, V., Kurz, M. S., and Spindler, M. (2021).
DoubleML - An Object-Oriented Implementation of Double Machine Learning in Python.
arXiv:[2104.03220](https://arxiv.org/abs/2104.03220).
Bach, P., Chernozhukov, V., Kurz, M. S., and Spindler, M. (2022), DoubleML - An
Object-Oriented Implementation of Double Machine Learning in Python,
Journal of Machine Learning Research, 23(53): 1-6,
[https://www.jmlr.org/papers/v23/21-0862.html](https://www.jmlr.org/papers/v23/21-0862.html).

Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018).
Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
Expand Down
30 changes: 15 additions & 15 deletions doubleml_serverless/double_ml_iivm_aws_lambda.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,22 @@ def __init__(self,
draw_sample_splitting=True,
apply_cross_fitting=True):
DoubleMLIIVM.__init__(self,
obj_dml_data,
ml_g,
ml_m,
ml_r,
n_folds,
n_rep,
score,
subgroups,
dml_procedure,
trimming_rule,
trimming_threshold,
draw_sample_splitting,
apply_cross_fitting)
obj_dml_data=obj_dml_data,
ml_g=ml_g,
ml_m=ml_m,
ml_r=ml_r,
n_folds=n_folds,
n_rep=n_rep,
score=score,
subgroups=subgroups,
dml_procedure=dml_procedure,
trimming_rule=trimming_rule,
trimming_threshold=trimming_threshold,
draw_sample_splitting=draw_sample_splitting,
apply_cross_fitting=apply_cross_fitting)
DoubleMLLambda.__init__(self,
lambda_function_name,
aws_region)
lambda_function_name=lambda_function_name,
aws_region=aws_region)

def _ml_nuisance_aws_lambda(self, cv_params):
assert self._dml_data.n_treat == 1
Expand Down
27 changes: 13 additions & 14 deletions doubleml_serverless/double_ml_irm_aws_lambda.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from doubleml import DoubleMLIRM
import numpy as np
from sklearn.utils import check_X_y

from ._helper import _get_cond_smpls
Expand All @@ -24,20 +23,20 @@ def __init__(self,
draw_sample_splitting=True,
apply_cross_fitting=True):
DoubleMLIRM.__init__(self,
obj_dml_data,
ml_g,
ml_m,
n_folds,
n_rep,
score,
dml_procedure,
trimming_rule,
trimming_threshold,
draw_sample_splitting,
apply_cross_fitting)
obj_dml_data=obj_dml_data,
ml_g=ml_g,
ml_m=ml_m,
n_folds=n_folds,
n_rep=n_rep,
score=score,
dml_procedure=dml_procedure,
trimming_rule=trimming_rule,
trimming_threshold=trimming_threshold,
draw_sample_splitting=draw_sample_splitting,
apply_cross_fitting=apply_cross_fitting)
DoubleMLLambda.__init__(self,
lambda_function_name,
aws_region)
lambda_function_name=lambda_function_name,
aws_region=aws_region)

def _ml_nuisance_aws_lambda(self, cv_params):
assert self._dml_data.n_treat == 1
Expand Down
37 changes: 19 additions & 18 deletions doubleml_serverless/double_ml_pliv_aws_lambda.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self,
lambda_function_name,
aws_region,
obj_dml_data,
ml_g,
ml_l,
ml_m,
ml_r,
n_folds=5,
Expand All @@ -21,19 +21,19 @@ def __init__(self,
draw_sample_splitting=True,
apply_cross_fitting=True):
DoubleMLPLIV.__init__(self,
obj_dml_data,
ml_g,
ml_m,
ml_r,
n_folds,
n_rep,
score,
dml_procedure,
draw_sample_splitting,
apply_cross_fitting)
obj_dml_data=obj_dml_data,
ml_l=ml_l,
ml_m=ml_m,
ml_r=ml_r,
n_folds=n_folds,
n_rep=n_rep,
score=score,
dml_procedure=dml_procedure,
draw_sample_splitting=draw_sample_splitting,
apply_cross_fitting=apply_cross_fitting)
DoubleMLLambda.__init__(self,
lambda_function_name,
aws_region)
lambda_function_name=lambda_function_name,
aws_region=aws_region)

def _ml_nuisance_aws_lambda(self, cv_params):
assert self._dml_data.n_treat == 1
Expand All @@ -47,12 +47,12 @@ def _ml_nuisance_aws_lambda(self, cv_params):

payload = self._dml_data.get_payload()

payload_ml_g = payload.copy()
payload_ml_l = payload.copy()
payload_ml_m = payload.copy()
payload_ml_r = payload.copy()

_attach_learner(payload_ml_g,
'ml_g', self.learner['ml_g'],
_attach_learner(payload_ml_l,
'ml_l', self.learner['ml_l'],
self._dml_data.y_col, self._dml_data.x_cols)

_attach_learner(payload_ml_m,
Expand All @@ -63,7 +63,7 @@ def _ml_nuisance_aws_lambda(self, cv_params):
'ml_r', self.learner['ml_r'],
self._dml_data.d_cols[0], self._dml_data.x_cols)

payloads = _attach_smpls([payload_ml_g, payload_ml_m, payload_ml_r],
payloads = _attach_smpls([payload_ml_l, payload_ml_m, payload_ml_r],
[self.smpls, self.smpls, self.smpls],
self.n_folds,
self.n_rep,
Expand All @@ -80,9 +80,10 @@ def _ml_nuisance_aws_lambda(self, cv_params):
# compute score elements
self._psi_a[:, i_rep, self._i_treat], self._psi_b[:, i_rep, self._i_treat] = \
self._score_elements(y, z, d,
preds['ml_g'][:, i_rep],
preds['ml_l'][:, i_rep],
preds['ml_m'][:, i_rep],
preds['ml_r'][:, i_rep],
None,
self.smpls[i_rep])

return
38 changes: 19 additions & 19 deletions doubleml_serverless/double_ml_plr_aws_lambda.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from doubleml import DoubleMLPLR
import numpy as np
from sklearn.utils import check_X_y

from .double_ml_aws_lambda import DoubleMLLambda
from ._helper import _attach_learner, _attach_smpls, _extract_preds
from ._helper import _attach_learner, _attach_smpls


class DoubleMLPLRServerless(DoubleMLPLR, DoubleMLLambda):
def __init__(self,
lambda_function_name,
aws_region,
obj_dml_data,
ml_g,
ml_l,
ml_m,
n_folds=5,
n_rep=1,
Expand All @@ -20,18 +19,18 @@ def __init__(self,
draw_sample_splitting=True,
apply_cross_fitting=True):
DoubleMLPLR.__init__(self,
obj_dml_data,
ml_g,
ml_m,
n_folds,
n_rep,
score,
dml_procedure,
draw_sample_splitting,
apply_cross_fitting)
obj_dml_data=obj_dml_data,
ml_l=ml_l,
ml_m=ml_m,
n_folds=n_folds,
n_rep=n_rep,
score=score,
dml_procedure=dml_procedure,
draw_sample_splitting=draw_sample_splitting,
apply_cross_fitting=apply_cross_fitting)
DoubleMLLambda.__init__(self,
lambda_function_name,
aws_region)
lambda_function_name=lambda_function_name,
aws_region=aws_region)

def _ml_nuisance_aws_lambda(self, cv_params):
assert self._dml_data.n_treat == 1
Expand All @@ -42,18 +41,18 @@ def _ml_nuisance_aws_lambda(self, cv_params):

payload = self._dml_data.get_payload()

payload_ml_g = payload.copy()
payload_ml_l = payload.copy()
payload_ml_m = payload.copy()

_attach_learner(payload_ml_g,
'ml_g', self.learner['ml_g'],
_attach_learner(payload_ml_l,
'ml_l', self.learner['ml_l'],
self._dml_data.y_col, self._dml_data.x_cols)

_attach_learner(payload_ml_m,
'ml_m', self.learner['ml_m'],
self._dml_data.d_cols[0], self._dml_data.x_cols)

payloads = _attach_smpls([payload_ml_g, payload_ml_m],
payloads = _attach_smpls([payload_ml_l, payload_ml_m],
[self.smpls, self.smpls],
self.n_folds,
self.n_rep,
Expand All @@ -70,8 +69,9 @@ def _ml_nuisance_aws_lambda(self, cv_params):
# compute score elements
self._psi_a[:, i_rep, self._i_treat], self._psi_b[:, i_rep, self._i_treat] = \
self._score_elements(y, d,
preds['ml_g'][:, i_rep],
preds['ml_l'][:, i_rep],
preds['ml_m'][:, i_rep],
None,
self.smpls[i_rep])

return
20 changes: 10 additions & 10 deletions doubleml_serverless/tests/test_pliv.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,16 @@ def dml_pliv_fixture(generate_data_pliv, idx, learner, score, dml_procedure):
x_cols = data.columns[data.columns.str.startswith('X')].tolist()

# Set machine learning methods for m & g
ml_g = clone(learner)
ml_l = clone(learner)
ml_m = clone(learner)
ml_r = clone(learner)

np.random.seed(3141)
dml_data_json = dml_lambda.DoubleMLDataJson(data, 'y', ['d'], x_cols, 'Z1')
dml_pliv_lambda = DoubleMLPLIVServerlessLocal('local', 'local',
dml_data_json,
ml_g, ml_m, ml_r,
n_folds,
ml_l, ml_m, ml_r,
n_folds=n_folds,
score=score,
dml_procedure=dml_procedure)

Expand All @@ -76,8 +76,8 @@ def dml_pliv_fixture(generate_data_pliv, idx, learner, score, dml_procedure):
np.random.seed(3141)
dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1')
dml_pliv = dml.DoubleMLPLIV(dml_data,
ml_g, ml_m, ml_r,
n_folds,
ml_l, ml_m, ml_r,
n_folds=n_folds,
score=score,
dml_procedure=dml_procedure)

Expand Down Expand Up @@ -140,7 +140,7 @@ def dml_pliv_scaling_fixture(generate_data_pliv, idx, learner, score, dml_proced
x_cols = data.columns[data.columns.str.startswith('X')].tolist()

# Set machine learning methods for m & g
ml_g = clone(learner)
ml_l = clone(learner)
ml_m = clone(learner)
ml_r = clone(learner)

Expand All @@ -149,8 +149,8 @@ def dml_pliv_scaling_fixture(generate_data_pliv, idx, learner, score, dml_proced
np.random.seed(3141)
dml_pliv_folds = DoubleMLPLIVServerlessLocal('local', 'local',
dml_data_json,
ml_g, ml_m, ml_r,
n_folds,
ml_l, ml_m, ml_r,
n_folds=n_folds,
score=score,
dml_procedure=dml_procedure)

Expand All @@ -159,8 +159,8 @@ def dml_pliv_scaling_fixture(generate_data_pliv, idx, learner, score, dml_proced
np.random.seed(3141)
dml_pliv_reps = DoubleMLPLIVServerlessLocal('local', 'local',
dml_data_json,
ml_g, ml_m, ml_r,
n_folds,
ml_l, ml_m, ml_r,
n_folds=n_folds,
score=score,
dml_procedure=dml_procedure)

Expand Down
Loading

0 comments on commit 5e31300

Please sign in to comment.