diff --git a/.gitignore b/.gitignore
index 41a409eb..d9ffb93c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
+.flake8
diff --git a/README.md b/README.md
index 4431b5ab..f91eff54 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Conda Version](https://img.shields.io/conda/vn/conda-forge/doubleml.svg)](https://anaconda.org/conda-forge/doubleml)
 [![codecov](https://codecov.io/gh/DoubleML/doubleml-for-py/branch/main/graph/badge.svg?token=0BjlFPgdGk)](https://codecov.io/gh/DoubleML/doubleml-for-py)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/1c08ec7d782c451784293c996537de14)](https://www.codacy.com/gh/DoubleML/doubleml-for-py/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=DoubleML/doubleml-for-py&amp;utm_campaign=Badge_Grade)
-[![Python version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/)
+[![Python version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/)
 
 The Python package **DoubleML** provides an implementation of the double / debiased machine learning framework of
 [Chernozhukov et al. (2018)](https://doi.org/10.1111/ectj.12097).
diff --git a/doc/conf.py b/doc/conf.py
index e4fd9f18..0de66f38 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -22,7 +22,7 @@
 author = 'Bach, P., Chernozhukov, V., Kurz, M. S., and Spindler, M.'
 
 # The full version, including alpha/beta/rc tags
-release = '0.7.0'
+release = '0.7.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/doubleml/_utils.py b/doubleml/_utils.py
index 345f914f..c2ed9d4e 100644
--- a/doubleml/_utils.py
+++ b/doubleml/_utils.py
@@ -333,3 +333,9 @@ def _var_est(psi, psi_deriv, apply_cross_fitting, smpls, is_cluster_data,
     sigma2_hat = np.multiply(scaling, gamma_hat)
 
     return sigma2_hat, var_scaling_factor
+
+
+def _cond_targets(target, cond_sample):
+    cond_target = target.astype(float)
+    cond_target[np.invert(cond_sample)] = np.nan
+    return cond_target
diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index b4425b52..834b904f 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -226,3 +226,53 @@ def _check_benchmarks(benchmarks):
                 raise TypeError('benchmarks name must be of string type. '
                                 f'{str(benchmarks["name"][i])} of type {str(type(benchmarks["name"][i]))} was passed.')
     return
+
+
+def _check_weights(weights, score, n_obs, n_rep):
+    if weights is not None:
+
+        # check general type
+        if (not isinstance(weights, np.ndarray)) and (not isinstance(weights, dict)):
+            raise TypeError("weights must be a numpy array or dictionary. "
+                            f"weights of type {str(type(weights))} was passed.")
+
+        # check shape
+        if isinstance(weights, np.ndarray):
+            if (weights.ndim != 1) or weights.shape[0] != n_obs:
+                raise ValueError(f"weights must have shape ({n_obs},). "
+                                 f"weights of shape {weights.shape} was passed.")
+            if not np.all(0 <= weights):
+                raise ValueError("All weights values must be greater or equal 0.")
+            if weights.sum() == 0:
+                raise ValueError("At least one weight must be non-zero.")
+
+        # check special form for ATTE score
+        if score == "ATTE":
+            if not isinstance(weights, np.ndarray):
+                raise TypeError("weights must be a numpy array for ATTE score. "
+                                f"weights of type {str(type(weights))} was passed.")
+
+            is_binary = np.all((np.power(weights, 2) - weights) == 0)
+            if not is_binary:
+                raise ValueError("weights must be binary for ATTE score.")
+
+        # check general form for ATE score
+        if isinstance(weights, dict):
+            assert score == "ATE"
+            expected_keys = ["weights", "weights_bar"]
+            if not set(weights.keys()) == set(expected_keys):
+                raise ValueError(f"weights must have keys {expected_keys}. "
+                                 f"keys {str(weights.keys())} were passed.")
+
+            expected_shapes = [(n_obs,), (n_obs, n_rep)]
+            if weights["weights"].shape != expected_shapes[0]:
+                raise ValueError(f"weights must have shape {expected_shapes[0]}. "
+                                 f"weights of shape {weights['weights'].shape} was passed.")
+            if weights["weights_bar"].shape != expected_shapes[1]:
+                raise ValueError(f"weights_bar must have shape {expected_shapes[1]}. "
+                                 f"weights_bar of shape {weights['weights_bar'].shape} was passed.")
+            if (not np.all(weights["weights"] >= 0)) or (not np.all(weights["weights_bar"] >= 0)):
+                raise ValueError("All weights values must be greater or equal 0.")
+            if (weights["weights"].sum() == 0) or (weights["weights_bar"].sum() == 0):
+                raise ValueError("At least one weight must be non-zero.")
+    return
diff --git a/doubleml/_utils_resampling.py b/doubleml/_utils_resampling.py
index a2ebdf31..93546b06 100644
--- a/doubleml/_utils_resampling.py
+++ b/doubleml/_utils_resampling.py
@@ -4,6 +4,15 @@
 from sklearn.model_selection import KFold, RepeatedKFold, RepeatedStratifiedKFold
 
 
+# Remove warnings in future versions
+def deprication_apply_cross_fitting():
+    warnings.warn('The apply_cross_fitting argument is deprecated and will be removed in future versions. '
+                  'In the future, crossfitting is applied by default. '
+                  'To rely on sample splitting please use external predictions.',
+                  DeprecationWarning)
+    return
+
+
 class DoubleMLResampling:
     def __init__(self,
                  n_folds,
@@ -14,6 +23,8 @@ def __init__(self,
         self.n_folds = n_folds
         self.n_rep = n_rep
         self.n_obs = n_obs
+        if not apply_cross_fitting:
+            deprication_apply_cross_fitting()
         self.apply_cross_fitting = apply_cross_fitting
         self.stratify = stratify
         if (self.n_folds == 1) & self.apply_cross_fitting:
diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 02bac5ea..f534147e 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1228,3 +1228,120 @@ def f_g(beta_a):
                 'oracle_values': oracle_values}
 
     return res_dict
+
+
+def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
+    """
+    Creates a simple synthetic example for heterogeneous treatment effects.
+    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
+
+    The data is generated as
+
+    .. math::
+
+        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
+
+        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
+
+    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
+    \\sim\\mathcal{U}[-1,1]`.
+    If the treatment is set to be binary, the treatment is generated as
+
+    .. math::
+        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
+
+    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
+    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
+    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
+    on the dimension of :math:`x`.
+
+    If the heterogeneity is univariate the conditional treatment effect takes the following form
+
+    .. math::
+            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
+
+    whereas for the two-dimensional case the conditional treatment effect is defined as
+
+    .. math::
+        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+        Default is ``200``.
+
+    p : int
+        Dimension of covariates.
+        Default is ``30``.
+
+    support_size : int
+        Number of relevant (confounding) covariates.
+        Default is ``5``.
+
+    n_x : int
+        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
+        Default is ``1``.
+
+    binary_treatment : bool
+        Indicates whether the treatment is binary.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
+
+    """
+    # simple input checks
+    assert n_x in [1, 2], 'n_x must be either 1 or 2.'
+    assert support_size <= p, 'support_size must be smaller than p.'
+    assert isinstance(binary_treatment, bool), 'binary_treatment must be a boolean.'
+
+    # define treatment effects
+    if n_x == 1:
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
+    else:
+        assert n_x == 2
+
+        # redefine treatment effect
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
+
+    # Outcome support and coefficients
+    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
+    coefs_y = np.random.uniform(0, 1, size=support_size)
+    # treatment support and coefficients
+    support_d = support_y
+    coefs_d = np.random.uniform(0, 0.3, size=support_size)
+
+    # noise
+    epsilon = np.random.uniform(-1, 1, size=n_obs)
+    eta = np.random.uniform(-1, 1, size=n_obs)
+
+    # Generate controls, covariates, treatments and outcomes
+    x = np.random.uniform(0, 1, size=(n_obs, p))
+    # Heterogeneous treatment effects
+    te = treatment_effect(x)
+    if binary_treatment:
+        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
+    else:
+        d = np.dot(x[:, support_d], coefs_d) + eta
+    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
+
+    # Now we build the dataset
+    y_df = pd.DataFrame({'y': y})
+    d_df = pd.DataFrame({'d': d})
+    x_df = pd.DataFrame(
+        data=x,
+        index=np.arange(x.shape[0]),
+        columns=[f'X_{i}' for i in range(x.shape[1])]
+    )
+
+    data = pd.concat([y_df, d_df, x_df], axis=1)
+    res_dict = {
+        'data': data,
+        'effects': te,
+        'treatment_effect': treatment_effect}
+    return res_dict
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 082c6f3c..12050d6a 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -19,11 +19,26 @@
 from ._utils_checks import _check_in_zero_one, _check_integer, _check_float, _check_bool, _check_is_partition, \
     _check_all_smpls, _check_smpl_split, _check_smpl_split_tpl, _check_benchmarks
 from ._utils_plots import _sensitivity_contour_plot
-
+from .utils.gain_statistics import gain_statistics
 
 _implemented_data_backends = ['DoubleMLData', 'DoubleMLClusterData']
 
 
+# Remove warnings in future versions
+def deprication_apply_cross_fitting():
+    warnings.warn('The apply_cross_fitting argument is deprecated and will be removed in future versions. '
+                  'In the future, crossfitting is applied by default. '
+                  'To rely on sample splitting please use external predictions.',
+                  DeprecationWarning)
+    return
+
+
+def deprication_dml_procedure():
+    warnings.warn('The dml_procedure argument is deprecated and will be removed in future versions. '
+                  'in the future, dml_procedure is always set to dml2.', DeprecationWarning)
+    return
+
+
 class DoubleML(ABC):
     """Double Machine Learning.
     """
@@ -64,6 +79,9 @@ def __init__(self,
         self._sensitivity_elements = None
         self._sensitivity_params = None
 
+        # initialize external predictions
+        self._external_predictions_implemented = False
+
         # check resampling specifications
         if not isinstance(n_folds, int):
             raise TypeError('The number of folds must be of int type. '
@@ -86,6 +104,9 @@ def __init__(self,
             raise TypeError('draw_sample_splitting must be True or False. '
                             f'Got {str(draw_sample_splitting)}.')
 
+        if not apply_cross_fitting:
+            deprication_apply_cross_fitting()
+
         # set resampling specifications
         if self._is_cluster_data:
             if (n_folds == 1) or (not apply_cross_fitting):
@@ -100,11 +121,15 @@ def __init__(self,
         # default is no stratification
         self._strata = None
 
-        # check and set dml_procedure and score
         if (not isinstance(dml_procedure, str)) | (dml_procedure not in ['dml1', 'dml2']):
             raise ValueError('dml_procedure must be "dml1" or "dml2". '
                              f'Got {str(dml_procedure)}.')
         self._dml_procedure = dml_procedure
+
+        if dml_procedure == 'dml1':
+            deprication_dml_procedure()
+        self._dml_procedure = dml_procedure
+
         self._score = score
 
         if (self.n_folds == 1) & self.apply_cross_fitting:
@@ -124,7 +149,7 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi, self._psi_deriv, self._psi_elements,\
+        self._psi, self._psi_deriv, self._psi_elements, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
@@ -247,7 +272,8 @@ def params_names(self):
     @property
     def predictions(self):
         """
-        The predictions of the nuisance models.
+        The predictions of the nuisance models in form of a dictinary.
+        Each key refers to a nuisance element with a array of values of shape ``(n_obs, n_rep, n_coefs)``.
         """
         return self._predictions
 
@@ -329,6 +355,7 @@ def psi(self):
         Values of the score function after calling :meth:`fit`;
         For models (e.g., PLR, IRM, PLIV, IIVM) with linear score (in the parameter)
         :math:`\\psi(W; \\theta, \\eta) = \\psi_a(W; \\eta) \\theta + \\psi_b(W; \\eta)`.
+        The shape is ``(n_obs, n_rep, n_coefs)``.
         """
         return self._psi
 
@@ -339,6 +366,7 @@ def psi_deriv(self):
         after calling :meth:`fit`;
         For models (e.g., PLR, IRM, PLIV, IIVM) with linear score (in the parameter)
         :math:`\\psi_a(W; \\eta)`.
+        The shape is ``(n_obs, n_rep, n_coefs)``.
         """
         return self._psi_deriv
 
@@ -486,7 +514,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
+    def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None, store_models=False):
         """
         Estimate DoubleML models.
 
@@ -498,13 +526,20 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
 
         store_predictions : bool
             Indicates whether the predictions for the nuisance functions should be stored in ``predictions``.
-            Default is ``False``.
+            Default is ``True``.
 
         store_models : bool
             Indicates whether the fitted models for the nuisance functions should be stored in ``models``. This allows
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
+        external_predictions : None or dict
+            If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
+            for a specific learner is supplied, the model will use the supplied nuisance predictions instead. Has to
+            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the
+            corresponding learners.
+            Default is `None`.
+
         Returns
         -------
         self : object
@@ -523,6 +558,13 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
+        # check if external predictions are implemented
+        if self._external_predictions_implemented:
+            # check prediction format
+            self._check_external_predictions(external_predictions)
+        elif not self._external_predictions_implemented and external_predictions is not None:
+            raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
+
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
 
@@ -546,8 +588,24 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
                 if self._dml_data.n_treat > 1:
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
 
+                # set the supplied predictions for the treatment and each learner (including None)
+                ext_prediction_dict = {}
+                for learner in self.params_names:
+                    if external_predictions is None:
+                        ext_prediction_dict[learner] = None
+                    elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][:, i_rep]
+                        else:
+                            ext_prediction_dict[learner] = None
+                    else:
+                        ext_prediction_dict[learner] = None
+
                 # ml estimation of nuisance models and computation of score elements
-                score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv, return_models=store_models)
+                score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv,
+                                                           external_predictions=ext_prediction_dict,
+                                                           return_models=store_models)
+
                 self._set_score_elements(score_elements, self._i_rep, self._i_treat)
 
                 # calculate rmses and store predictions and targets of the nuisance models
@@ -985,7 +1043,7 @@ def _initialize_ml_nuisance_params(self):
         pass
 
     @abstractmethod
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models):
+    def _nuisance_est(self, smpls, n_jobs_cv, return_models, external_predictions):
         pass
 
     @abstractmethod
@@ -1037,6 +1095,48 @@ def _check_learner(learner, learner_name, regressor, classifier):
 
         return learner_is_classifier
 
+    def _check_external_predictions(self, external_predictions):
+        if external_predictions is not None:
+            if not isinstance(external_predictions, dict):
+                raise TypeError('external_predictions must be a dictionary. '
+                                f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
+
+            supplied_treatments = list(external_predictions.keys())
+            valid_treatments = self._dml_data.d_cols
+            if not set(supplied_treatments).issubset(valid_treatments):
+                raise ValueError('Invalid external_predictions. '
+                                 f'Invalid treatment variable in {str(supplied_treatments)}. '
+                                 'Valid treatment variables ' + ' or '.join(valid_treatments) + '.')
+
+            for treatment in supplied_treatments:
+                if not isinstance(external_predictions[treatment], dict):
+                    raise TypeError('external_predictions must be a nested dictionary. '
+                                    f'For treatment {str(treatment)} a value of type '
+                                    f'{str(type(external_predictions[treatment]))} was passed.')
+
+                supplied_learners = list(external_predictions[treatment].keys())
+                valid_learners = self.params_names
+                if not set(supplied_learners).issubset(valid_learners):
+                    raise ValueError('Invalid external_predictions. '
+                                     f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
+                                     'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
+
+                for learner in supplied_learners:
+                    if not isinstance(external_predictions[treatment][learner], np.ndarray):
+                        raise TypeError('Invalid external_predictions. '
+                                        'The values of the nested list must be a numpy array. '
+                                        'Invalid predictions for treatment ' + str(treatment) +
+                                        ' and learner ' + str(learner) + '. ' +
+                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
+
+                    expected_shape = (self._dml_data.n_obs, self.n_rep)
+                    if external_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid external_predictions. '
+                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                                         'Invalid predictions for treatment ' + str(treatment) +
+                                         ' and learner ' + str(learner) + '. ' +
+                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
+
     def _initialize_arrays(self):
         # scores
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
@@ -1898,45 +1998,6 @@ def sensitivity_benchmark(self, benchmarking_set):
         dml_short._dml_data.x_cols = x_list_short
         dml_short.fit()
 
-        # save elements for readability
-        var_y = np.var(self._dml_data.y)
-        var_y_residuals_long = np.squeeze(self.sensitivity_elements['sigma2'], axis=0)
-        nu2_long = np.squeeze(self.sensitivity_elements['nu2'], axis=0)
-        var_y_residuals_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0)
-        nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0)
-
-        # compute nonparametric R2
-        R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y)
-        R2_y_short = 1.0 - np.divide(var_y_residuals_short, var_y)
-        R2_riesz = np.divide(nu2_short, nu2_long)
-
-        # Gain statistics
-        all_cf_y_benchmark = np.clip(np.divide((R2_y_long - R2_y_short), (1.0 - R2_y_long)), 0, 1)
-        all_cf_d_benchmark = np.clip(np.divide((1.0 - R2_riesz), R2_riesz), 0, 1)
-        cf_y_benchmark = np.median(all_cf_y_benchmark, axis=0)
-        cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0)
-
-        # change in estimates (slightly different to paper)
-        all_delta_theta = np.transpose(dml_short.all_coef - self.all_coef)
-        delta_theta = np.median(all_delta_theta, axis=0)
-
-        # degree of adversity
-        var_g = var_y_residuals_short - var_y_residuals_long
-        var_riesz = nu2_long - nu2_short
-        denom = np.sqrt(np.multiply(var_g, var_riesz), out=np.zeros_like(var_g), where=(var_g > 0) & (var_riesz > 0))
-        rho_sign = np.sign(all_delta_theta)
-        rho_values = np.clip(np.divide(np.absolute(all_delta_theta),
-                                       denom,
-                                       out=np.ones_like(all_delta_theta),
-                                       where=denom != 0),
-                             0.0, 1.0)
-        all_rho_benchmark = np.multiply(rho_values, rho_sign)
-        rho_benchmark = np.median(all_rho_benchmark, axis=0)
-        benchmark_dict = {
-            "cf_y": cf_y_benchmark,
-            "cf_d": cf_d_benchmark,
-            "rho": rho_benchmark,
-            "delta_theta": delta_theta,
-        }
+        benchmark_dict = gain_statistics(dml_long=self, dml_short=dml_short)
         df_benchmark = pd.DataFrame(benchmark_dict, index=self._dml_data.d_cols)
         return df_benchmark
diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 3bc44a73..bfdf7671 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -1,6 +1,7 @@
 import statsmodels.api as sm
 import numpy as np
 import pandas as pd
+import warnings
 
 from scipy.stats import norm
 from scipy.linalg import sqrtm
@@ -132,12 +133,13 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
         ----------
         basis : :class:`pandas.DataFrame`
             The basis for constructing the confidence interval. Has to have the same form as the basis from
-            the construction. If ``None`` the basis for the construction of the model is used.
-            Default is ``None``
+            the construction. If ``None`` is passed, if the basis is constructed for GATEs, the GATEs are returned.
+            Else, the confidence intervals for the basis coefficients are returned (with pointwise cofidence intervals).
+            Default is ``None``.
 
         joint : bool
             Indicates whether joint confidence intervals are computed.
-            Default is ``False``
+            Default is ``False``.
 
         level : float
             The confidence level.
@@ -182,7 +184,20 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
                 basis = pd.DataFrame(np.diag(v=np.full((self._basis.shape[1]), True)))
                 gate_names = list(self._basis.columns.values)
             else:
-                basis = self._basis
+                if joint:
+                    warnings.warn('Returning pointwise confidence intervals for basis coefficients.', UserWarning)
+                # return the confidence intervals for the basis coefficients
+                ci = np.vstack((
+                    self.blp_model.conf_int(alpha=alpha/2)[0],
+                    self.blp_model.params,
+                    self.blp_model.conf_int(alpha=alpha/2)[1])
+                    ).T
+                df_ci = pd.DataFrame(
+                    ci,
+                    columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
+                    index=self._basis.columns)
+                return df_ci
+
         elif not (basis.shape[1] == self._basis.shape[1]):
             raise ValueError('Invalid basis: DataFrame has to have the exact same number and ordering of columns.')
         elif not list(basis.columns.values) == list(self._basis.columns.values):
diff --git a/doubleml/double_ml_cvar.py b/doubleml/double_ml_cvar.py
index f76c353c..7bc3fc7b 100644
--- a/doubleml/double_ml_cvar.py
+++ b/doubleml/double_ml_cvar.py
@@ -7,7 +7,7 @@
 from .double_ml import DoubleML
 from .double_ml_score_mixins import LinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, \
-    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score
+    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
@@ -207,7 +207,7 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in ['ml_g', 'ml_m']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
@@ -296,8 +296,7 @@ def ipw_score(theta):
         m_hat['targets'] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = g_hat['targets'].astype(float)
-        g_hat['targets'][d != self.treatment] = np.nan
+        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
 
         if return_models:
             g_hat['models'] = fitted_models['ml_g']
diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 6d469bd3..77bface4 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -146,8 +146,8 @@ def __init__(self,
         self._trimming_rule = trimming_rule
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
-
         self._sensitivity_implemented = True
+        self._external_predictions_implemented = True
 
     @property
     def in_sample_normalization(self):
@@ -194,7 +194,7 @@ def _check_data(self, obj_dml_data):
                              'needs to be specified as treatment variable.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
@@ -203,31 +203,49 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
         # nuisance g
         # get train indices for d == 0
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][d == 1] = np.nan
 
-        g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
+        # nuisance g for d==0
+        if external_predictions['ml_g0'] is not None:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
+
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat0['targets'] = g_hat0['targets'].astype(float)
+            g_hat0['targets'][d == 1] = np.nan
+
+        # nuisance g for d==1
+        if external_predictions['ml_g1'] is not None:
+            g_hat1 = {'preds': external_predictions['ml_g1'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
 
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat1['targets'] = g_hat1['targets'].astype(float)
-        g_hat1['targets'][d == 0] = np.nan
+            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat1['targets'] = g_hat1['targets'].astype(float)
+            g_hat1['targets'][d == 0] = np.nan
 
         # only relevant for observational setting
         m_hat = {'preds': None, 'targets': None, 'models': None}
         if self.score == 'observational':
             # nuisance m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                         'targets': None,
+                         'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 5591632b..55b5e32e 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -148,6 +148,7 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        self._external_predictions_implemented = True
 
     @property
     def in_sample_normalization(self):
@@ -207,7 +208,7 @@ def _check_data(self, obj_dml_data):
 
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
@@ -228,40 +229,62 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
 
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
-
-        g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
-        g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
-
-        g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
-        g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
-
-        g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
-        g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
-
-        g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
-        g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
+        if external_predictions['ml_g_d0_t0'] is not None:
+            g_hat_d0_t0 = {'preds': external_predictions['ml_g_d0_t0'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
+                                          est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
+
+            g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
+            g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
+        if external_predictions['ml_g_d0_t1'] is not None:
+            g_hat_d0_t1 = {'preds': external_predictions['ml_g_d0_t1'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
+                                          est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
+            g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
+            g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
+        if external_predictions['ml_g_d1_t0'] is not None:
+            g_hat_d1_t0 = {'preds': external_predictions['ml_g_d1_t0'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
+                                          est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
+            g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
+            g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
+        if external_predictions['ml_g_d1_t1'] is not None:
+            g_hat_d1_t1 = {'preds': external_predictions['ml_g_d1_t1'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
+                                          est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
+            g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
+            g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
 
         # only relevant for observational or experimental setting
         m_hat = {'preds': None, 'targets': None, 'models': None}
         if self.score == 'observational':
             # nuisance m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                         'targets': None,
+                         'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
+                _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+                _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
 
         psi_a, psi_b = self._score_elements(y, d, t,
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index a872ebca..d981250e 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -193,6 +193,7 @@ def __init__(self,
                 raise TypeError("subgroups['never_takers'] must be True or False. "
                                 f'Got {str(subgroups["never_takers"])}.')
         self.subgroups = subgroups
+        self._external_predictions_implemented = True
 
     @property
     def normalize_ipw(self):
@@ -246,7 +247,7 @@ def _check_data(self, obj_dml_data):
             raise ValueError(err_msg)
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, z = check_X_y(x, np.ravel(self._dml_data.z),
@@ -258,13 +259,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
         smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
 
         # nuisance g
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][z == 1] = np.nan
+        if external_predictions['ml_g0'] is not None:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat0['targets'] = g_hat0['targets'].astype(float)
+            g_hat0['targets'][z == 1] = np.nan
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
@@ -276,14 +282,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
                                  'probabilities and not labels are predicted.')
 
             _check_is_propensity(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
-
-        g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat1['targets'] = g_hat1['targets'].astype(float)
-        g_hat1['targets'][z == 0] = np.nan
+        if external_predictions['ml_g1'] is not None:
+            g_hat1 = {'preds': external_predictions['ml_g1'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
+            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat1['targets'] = g_hat1['targets'].astype(float)
+            g_hat1['targets'][z == 0] = np.nan
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
@@ -297,34 +307,53 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
             _check_is_propensity(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+        if external_predictions['ml_m'] is not None:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
 
         # nuisance r
+        r0 = external_predictions['ml_r0'] is not None
         if self.subgroups['always_takers']:
-            r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
-                                     return_models=return_models)
+            if r0:
+                r_hat0 = {'preds': external_predictions['ml_r0'],
+                          'targets': None,
+                          'models': None}
+            else:
+                r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
+                                         est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
+                                         return_models=return_models)
         else:
             r_hat0 = {'preds': np.zeros_like(d), 'targets': np.zeros_like(d), 'models': None}
-        _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        # adjust target values to consider only compatible subsamples
-        r_hat0['targets'] = r_hat0['targets'].astype(float)
-        r_hat0['targets'][z == 1] = np.nan
+        if not r0:
+            _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            # adjust target values to consider only compatible subsamples
+            r_hat0['targets'] = r_hat0['targets'].astype(float)
+            r_hat0['targets'][z == 1] = np.nan
 
+        r1 = external_predictions['ml_r1'] is not None
         if self.subgroups['never_takers']:
-            r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
-                                     return_models=return_models)
+            if r1:
+                r_hat1 = {'preds': external_predictions['ml_r1'],
+                          'targets': None,
+                          'models': None}
+            else:
+                r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
+                                         est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
+                                         return_models=return_models)
         else:
             r_hat1 = {'preds': np.ones_like(d), 'targets': np.ones_like(d), 'models': None}
-        _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        # adjust target values to consider only compatible subsamples
-        r_hat1['targets'] = r_hat1['targets'].astype(float)
-        r_hat1['targets'][z == 0] = np.nan
+        if not r1:
+            _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            # adjust target values to consider only compatible subsamples
+            r_hat1['targets'] = r_hat1['targets'].astype(float)
+            r_hat1['targets'][z == 0] = np.nan
 
         psi_a, psi_b = self._score_elements(y, z, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 4bbe42f7..84009533 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -11,8 +11,9 @@
 from .double_ml_data import DoubleMLData
 from .double_ml_score_mixins import LinearScoreMixin
 
-from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw
-from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer
+from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw, _cond_targets
+from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer, \
+    _check_weights
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
@@ -47,6 +48,15 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
         or a callable object / function with signature ``psi_a, psi_b = score(y, d, g_hat0, g_hat1, m_hat, smpls)``.
         Default is ``'ATE'``.
 
+    weights : array, dict or None
+        An numpy array of weights for each individual observation. If None, then the ``'ATE'`` score
+        is applied (corresponds to weights equal to 1). Can only be used with ``score = 'ATE'``.
+        An array has to be of shape ``(n,)``, where ``n`` is the number of observations.
+        A dictionary can be used to specify weights which depend on the treatment variable.
+        In this case, the dictionary has to contain two keys ``weights`` and ``weights_bar``, where the values
+        have to be arrays of shape ``(n,)`` and ``(n, n_rep)``.
+        Default is ``None``.
+
     dml_procedure : str
         A str (``'dml1'`` or ``'dml2'``) specifying the double machine learning algorithm.
         Default is ``'dml2'``.
@@ -118,6 +128,7 @@ def __init__(self,
                  n_folds=5,
                  n_rep=1,
                  score='ATE',
+                 weights=None,
                  dml_procedure='dml2',
                  normalize_ipw=False,
                  trimming_rule='truncate',
@@ -160,6 +171,10 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        self._external_predictions_implemented = True
+
+        _check_weights(weights, score, obj_dml_data.n_obs, self.n_rep)
+        self._initialize_weights(weights)
 
     @property
     def normalize_ipw(self):
@@ -182,11 +197,49 @@ def trimming_threshold(self):
         """
         return self._trimming_threshold
 
+    @property
+    def weights(self):
+        """
+        Specifies the weights for a weighted ATE.
+        """
+        return self._weights
+
     def _initialize_ml_nuisance_params(self):
         valid_learner = ['ml_g0', 'ml_g1', 'ml_m']
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in valid_learner}
 
+    def _initialize_weights(self, weights):
+        if weights is None:
+            weights = np.ones(self._dml_data.n_obs)
+        if isinstance(weights, np.ndarray):
+            self._weights = {'weights': weights}
+        else:
+            assert isinstance(weights, dict)
+            self._weights = weights
+
+    def _get_weights(self, m_hat=None):
+        # standard case for ATE
+        if self.score == 'ATE':
+            weights = self._weights['weights']
+            if 'weights_bar' not in self._weights.keys():
+                weights_bar = self._weights['weights']
+            else:
+                weights_bar = self._weights['weights_bar'][:, self._i_rep]
+        else:
+            # special case for ATTE
+            assert self.score == 'ATTE'
+            assert m_hat is not None
+            subgroup = self._weights['weights'] * self._dml_data.d
+            subgroup_probability = np.mean(subgroup)
+            weights = np.divide(subgroup, subgroup_probability)
+
+            weights_bar = np.divide(
+                np.multiply(m_hat, self._weights['weights']),
+                subgroup_probability)
+
+        return weights, weights_bar
+
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError('The data must be of DoubleMLData type. '
@@ -206,41 +259,58 @@ def _check_data(self, obj_dml_data):
                              'needs to be specified as treatment variable.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
+        g0_external = external_predictions['ml_g0'] is not None
+        g1_external = external_predictions['ml_g1'] is not None
+        m_external = external_predictions['ml_m'] is not None
 
         # nuisance g
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][d == 1] = np.nan
-
-        if self._dml_data.binary_outcome:
-            binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
-            zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
-            if binary_preds & zero_one_preds:
-                raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                 f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                 'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                 'probabilities and not labels are predicted.')
-
-        g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat1['targets'] = g_hat1['targets'].astype(float)
-        g_hat1['targets'][d == 0] = np.nan
-
-        if self._dml_data.binary_outcome:
+        if g0_external:
+            # use external predictions
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
+
+            if self._dml_data.binary_outcome:
+                binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
+                zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
+                if binary_preds & zero_one_preds:
+                    raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
+                                     f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                     'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                     'probabilities and not labels are predicted.')
+        if self.score == 'ATTE':
+            # skip g_hat1 estimation
+            g_hat1 = {'preds': None,
+                      'targets': None,
+                      'models': None}
+
+        elif g1_external:
+            # use external predictions
+            g_hat1 = {'preds': external_predictions['ml_g1'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
+            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
+
+        if self._dml_data.binary_outcome & (self.score != 'ATTE'):
             binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
             zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
             if binary_preds & zero_one_preds:
@@ -250,12 +320,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
                                  'probabilities and not labels are predicted.')
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
-        m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        if m_external:
+            # use external predictions
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
 
         psi_a, psi_b = self._score_elements(y, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
@@ -277,42 +353,37 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
 
     def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
 
-        # fraction of treated for ATTE
-        p_hat = None
-        if self.score == 'ATTE':
-            p_hat = np.full_like(d, np.nan, dtype='float64')
-            for _, test_index in smpls:
-                p_hat[test_index] = np.mean(d[test_index])
-
+        m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
                 for _, test_index in smpls:
-                    m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+                    m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
             else:
-                m_hat = _normalize_ipw(m_hat, d)
+                m_hat_adj = _normalize_ipw(m_hat, d)
+        else:
+            m_hat_adj = m_hat
 
         # compute residuals
         u_hat0 = y - g_hat0
-        u_hat1 = None
-        if self.score == 'ATE':
-            u_hat1 = y - g_hat1
-
-        if isinstance(self.score, str):
+        if self.score == 'ATTE':
+            g_hat1 = y
+        u_hat1 = y - g_hat1
+
+        if (self.score == 'ATE') or (self.score == 'ATTE'):
+            weights, weights_bar = self._get_weights(m_hat=m_hat_adj)
+            psi_b = weights * (g_hat1 - g_hat0) \
+                + weights_bar * (
+                    np.divide(np.multiply(d, u_hat1), m_hat_adj)
+                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat_adj))
             if self.score == 'ATE':
-                psi_b = g_hat1 - g_hat0 \
-                    + np.divide(np.multiply(d, u_hat1), m_hat) \
-                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
-                psi_a = np.full_like(m_hat, -1.0)
+                psi_a = np.full_like(m_hat_adj, -1.0)
             else:
                 assert self.score == 'ATTE'
-                psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
-                    - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
-                                np.multiply(p_hat, (1.0 - m_hat)))
-                psi_a = - np.divide(d, p_hat)
+                psi_a = -1.0 * weights
         else:
             assert callable(self.score)
             psi_a, psi_b = self.score(y=y, d=d,
-                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat,
+                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj,
                                       smpls=smpls)
 
         return psi_a, psi_b
@@ -324,16 +395,14 @@ def _sensitivity_element_est(self, preds):
 
         m_hat = preds['predictions']['ml_m']
         g_hat0 = preds['predictions']['ml_g0']
-        g_hat1 = preds['predictions']['ml_g1']
-
-        # use weights make this extendable
         if self.score == 'ATE':
-            weights = np.ones_like(d)
-            weights_bar = np.ones_like(d)
+            g_hat1 = preds['predictions']['ml_g1']
         else:
             assert self.score == 'ATTE'
-            weights = np.divide(d, np.mean(d))
-            weights_bar = np.divide(m_hat, np.mean(d))
+            g_hat1 = y
+
+        # use weights make this extendable
+        weights, weights_bar = self._get_weights(m_hat=m_hat)
 
         sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0))
         sigma2 = np.mean(sigma2_score_element)
@@ -397,7 +466,7 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
 
         return res
 
-    def cate(self, basis):
+    def cate(self, basis, is_gate=False):
         """
         Calculate conditional average treatment effects (CATE) for a given basis.
 
@@ -406,6 +475,9 @@ def cate(self, basis):
         basis : :class:`pandas.DataFrame`
             The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
             where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
+        is_gate : bool
+            Indicates whether the basis is constructed for GATEs (dummy-basis).
+            Default is ``False``.
 
         Returns
         -------
@@ -424,35 +496,26 @@ def cate(self, basis):
         # define the orthogonal signal
         orth_signal = self.psi_elements['psi_b'].reshape(-1)
         # fit the best linear predictor
-        model = DoubleMLBLP(orth_signal, basis=basis).fit()
-
+        model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
+        model.fit()
         return model
 
     def gate(self, groups):
         """
-        Calculate group average treatment effects (GATE) for mutually exclusive groups.
+        Calculate group average treatment effects (GATE) for groups.
 
         Parameters
         ----------
         groups : :class:`pandas.DataFrame`
-            The group indicator for estimating the best linear predictor.
+            The group indicator for estimating the best linear predictor. Groups should be mutually exclusive.
             Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
             and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
 
         Returns
         -------
-        model : :class:`doubleML.DoubleMLBLPGATE`
+        model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model for Group Effects.
         """
-        valid_score = ['ATE']
-        if self.score not in valid_score:
-            raise ValueError('Invalid score ' + self.score + '. ' +
-                             'Valid score ' + ' or '.join(valid_score) + '.')
-
-        if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
-
         if not isinstance(groups, pd.DataFrame):
             raise TypeError('Groups must be of DataFrame type. '
                             f'Groups of type {str(type(groups))} was passed.')
@@ -467,11 +530,7 @@ def gate(self, groups):
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
-        # define the orthogonal signal
-        orth_signal = self.psi_elements['psi_b'].reshape(-1)
-        # fit the best linear predictor for GATE (different confint() method)
-        model = DoubleMLBLP(orth_signal, basis=groups, is_gate=True).fit()
-
+        model = self.cate(groups, is_gate=True)
         return model
 
     def policy_tree(self, features, depth=2, **tree_params):
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index d4a75e82..038a3984 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.utils.multiclass import type_of_target
 from sklearn.base import clone
 from sklearn.utils import check_X_y
@@ -9,11 +8,19 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from .double_ml_data import DoubleMLData
 
-from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, \
-    _get_bracket_guess, _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score
+from ._utils import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _cond_targets,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+)
 from ._utils_resampling import DoubleMLResampling
-from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_quantile
+from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, _check_quantile
 
 
 class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
@@ -100,29 +107,25 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     d  0.217244  0.636453  0.341336  0.73285 -1.03018  1.464668
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='LPQ',
-                 dml_procedure='dml2',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True,
-                 apply_cross_fitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         dml_procedure,
-                         draw_sample_splitting,
-                         apply_cross_fitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="LPQ",
+        dml_procedure="dml2",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+        apply_cross_fitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -130,21 +133,21 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
             self._kde = kde
         self._normalize_ipw = normalize_ipw
 
         self._check_data(self._dml_data)
 
-        valid_score = ['LPQ']
+        valid_score = ["LPQ"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -155,25 +158,36 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_m_z': clone(ml_m),
-                         'ml_g_du_z0': clone(ml_g), 'ml_g_du_z1': clone(ml_g),
-                         'ml_m_d_z0': clone(ml_m), 'ml_m_d_z1': clone(ml_m)}
-        self._predict_method = {'ml_m_z': 'predict_proba',
-                                'ml_g_du_z0': 'predict_proba', 'ml_g_du_z1': 'predict_proba',
-                                'ml_m_d_z0': 'predict_proba', 'ml_m_d_z1': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {
+            "ml_m_z": clone(ml_m),
+            "ml_g_du_z0": clone(ml_g),
+            "ml_g_du_z1": clone(ml_g),
+            "ml_m_d_z0": clone(ml_m),
+            "ml_m_d_z1": clone(ml_m),
+        }
+        self._predict_method = {
+            "ml_m_z": "predict_proba",
+            "ml_g_du_z0": "predict_proba",
+            "ml_g_du_z1": "predict_proba",
+            "ml_m_d_z0": "predict_proba",
+            "ml_m_d_z1": "predict_proba",
+        }
 
         self._initialize_ml_nuisance_params()
 
         if draw_sample_splitting:
             strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.z.reshape(-1, 1)
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    apply_cross_fitting=self.apply_cross_fitting,
-                                                    stratify=strata)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                apply_cross_fitting=self.apply_cross_fitting,
+                stratify=strata,
+            )
             self._smpls = obj_dml_resampling.split_samples()
+        self._external_predictions_implemented = True
 
     @property
     def quantile(self):
@@ -219,33 +233,33 @@ def trimming_threshold(self):
 
     @property
     def _score_element_names(self):
-        return ['ind_d', 'm_z', 'g_du_z0', 'g_du_z1', 'y', 'z', 'comp_prob']
+        return ["ind_d", "m_z", "g_du_z0", "g_du_z1", "y", "z", "comp_prob"]
 
     def _compute_ipw_score(self, theta, d, y, prop, z, comp_prob):
         sign = 2 * self.treatment - 1.0
         weights = sign * (z / prop - (1 - z) / (1 - prop)) / comp_prob
         u = (d == self._treatment) * (y <= theta)
-        v = -1. * self.quantile
+        v = -1.0 * self.quantile
         score = weights * u + v
         return score
 
     def _compute_score(self, psi_elements, coef, inds=None):
         sign = 2 * self.treatment - 1.0
-        ind_d = psi_elements['ind_d']
-        m_z = psi_elements['m_z']
-        g_du_z0 = psi_elements['g_du_z0']
-        g_du_z1 = psi_elements['g_du_z1']
-        y = psi_elements['y']
-        z = psi_elements['z']
-        comp_prob = psi_elements['comp_prob']
+        ind_d = psi_elements["ind_d"]
+        m_z = psi_elements["m_z"]
+        g_du_z0 = psi_elements["g_du_z0"]
+        g_du_z1 = psi_elements["g_du_z1"]
+        y = psi_elements["y"]
+        z = psi_elements["z"]
+        comp_prob = psi_elements["comp_prob"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            m_z = psi_elements['m_z']
-            g_du_z0 = psi_elements['g_du_z0'][inds]
-            g_du_z1 = psi_elements['g_du_z1'][inds]
-            y = psi_elements['y'][inds]
-            z = psi_elements['z'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            m_z = psi_elements["m_z"]
+            g_du_z0 = psi_elements["g_du_z0"][inds]
+            g_du_z1 = psi_elements["g_du_z1"][inds]
+            y = psi_elements["y"][inds]
+            z = psi_elements["z"][inds]
 
         score1 = g_du_z1 - g_du_z0
         score2 = (z / m_z) * (ind_d * (y <= coef) - g_du_z1)
@@ -255,17 +269,17 @@ def _compute_score(self, psi_elements, coef, inds=None):
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
         sign = 2 * self.treatment - 1.0
-        ind_d = psi_elements['ind_d']
-        y = psi_elements['y']
-        m_z = psi_elements['m_z']
-        z = psi_elements['z']
-        comp_prob = psi_elements['comp_prob']
+        ind_d = psi_elements["ind_d"]
+        y = psi_elements["y"]
+        m_z = psi_elements["m_z"]
+        z = psi_elements["z"]
+        comp_prob = psi_elements["comp_prob"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            y = psi_elements['y'][inds]
-            m_z = psi_elements['m_z'][inds]
-            z = psi_elements['z'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            y = psi_elements["y"][inds]
+            m_z = psi_elements["m_z"][inds]
+            z = psi_elements["z"][inds]
 
         score_weights = sign * ((z / m_z) - (1 - z) / (1 - m_z)) * ind_d / comp_prob
         u = (y - coef).reshape(-1, 1)
@@ -274,179 +288,242 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         return deriv
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_m_z', 'ml_g_du_z0', 'ml_g_du_z1',
-                                        'ml_m_d_z0', 'ml_m_d_z1']}
-
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
+        self._params = {
+            learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
+            for learner in ["ml_m_z", "ml_g_du_z0", "ml_g_du_z1", "ml_m_d_z0", "ml_m_d_z1"]
+        }
+
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+
+        m_z = external_predictions["ml_m_z"] is not None
+        m_d_d0 = external_predictions["ml_m_d_z0"] is not None
+        m_d_d1 = external_predictions["ml_m_d_z1"] is not None
+        g_du_z0 = external_predictions["ml_g_du_z0"] is not None
+        g_du_z1 = external_predictions["ml_g_du_z1"] is not None
+        ext_preds = [m_z, m_d_d0, m_d_d1, g_du_z0, g_du_z1]
 
         # create strata for splitting
         strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.z.reshape(-1, 1)
 
         # initialize nuisance predictions, targets and models
-        m_z_hat = {'models': None,
-                   'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                   'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                   }
-        m_d_z0_hat = copy.deepcopy(m_z_hat)
-        m_d_z1_hat = copy.deepcopy(m_z_hat)
-        g_du_z0_hat = copy.deepcopy(m_z_hat)
-        g_du_z1_hat = copy.deepcopy(m_z_hat)
-
-        # initialize models
-        fitted_models = {}
-        for learner in self.params_names:
-            # set nuisance model parameters
-            est_params = self._get_params(learner)
-            if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
-            else:
-                fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        if not all(ext_preds):
+            m_z_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            m_d_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            m_d_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            g_du_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            g_du_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+
+            # initialize models
+            fitted_models = {}
+            for learner in self.params_names:
+                # set nuisance model parameters
+                est_params = self._get_params(learner)
+                if est_params is not None:
+                    fitted_models[learner] = [
+                        clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                    ]
+                else:
+                    fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+        elif any(ext_preds) and not any(ext_preds):
+            raise ValueError("External predictions for all estimations or for none are required.")
+        else:
+            m_z_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_z"],
+            }
+            m_d_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_d_z0"],
+            }
+            m_d_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_d_z1"],
+            }
+            g_du_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g_du_z0"],
+            }
+            g_du_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g_du_z1"],
+            }
 
-        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
         # calculate nuisance functions over different folds
-        for i_fold in range(self.n_folds):
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
-
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=strata[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=strata[train_inds_1])]
-
-            d_train_1 = d[train_inds_1]
-            y_train_1 = y[train_inds_1]
-            x_train_1 = x[train_inds_1, :]
-            z_train_1 = z[train_inds_1]
-
-            # preliminary propensity for z
-            ml_m_z_prelim = clone(fitted_models['ml_m_z'][i_fold])
-            m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
-                                             method='predict_proba', smpls=smpls_prelim)['preds']
-
-            m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
-            if self._normalize_ipw:
-                m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
-
-            # propensity for d == 1 cond. on z == 0 (training set 1)
-            z0_train_1 = z_train_1 == 0
-            x_z0_train_1 = x_train_1[z0_train_1, :]
-            d_z0_train_1 = d_train_1[z0_train_1]
-            ml_m_d_z0_prelim = clone(fitted_models['ml_m_d_z0'][i_fold])
-            ml_m_d_z0_prelim.fit(x_z0_train_1, d_z0_train_1)
-            m_d_z0_hat_prelim = _predict_zero_one_propensity(ml_m_d_z0_prelim, x_train_1)
-
-            # propensity for d == 1 cond. on z == 1 (training set 1)
-            z1_train_1 = z_train_1 == 1
-            x_z1_train_1 = x_train_1[z1_train_1, :]
-            d_z1_train_1 = d_train_1[z1_train_1]
-            ml_m_d_z1_prelim = clone(fitted_models['ml_m_d_z1'][i_fold])
-            ml_m_d_z1_prelim.fit(x_z1_train_1, d_z1_train_1)
-            m_d_z1_hat_prelim = _predict_zero_one_propensity(ml_m_d_z1_prelim, x_train_1)
-
-            # preliminary estimate of theta_2_aux
-            comp_prob_prelim = np.mean(m_d_z1_hat_prelim - m_d_z0_hat_prelim
-                                       + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
-                                       - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim))
-
-            # preliminary ipw estimate
-            def ipw_score(theta):
-                res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_z_hat_prelim,
-                                                      z_train_1, comp_prob_prelim))
-                return res
-
-            _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
-            ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
-            ipw_vec[i_fold] = ipw_est
-
-            # use the preliminary estimates to fit the nuisance parameters on train_2
-            d_train_2 = d[train_inds_2]
-            y_train_2 = y[train_inds_2]
-            x_train_2 = x[train_inds_2, :]
-            z_train_2 = z[train_inds_2]
-
-            # define test observations
-            d_test = d[test_inds]
-            y_test = y[test_inds]
-            x_test = x[test_inds, :]
-            z_test = z[test_inds]
-
-            # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 0
-            z0_train_2 = z_train_2 == 0
-            x_z0_train_2 = x_train_2[z0_train_2, :]
-            du_z0_train_2 = (d_train_2[z0_train_2] == self._treatment) * (y_train_2[z0_train_2] <= ipw_est)
-            fitted_models['ml_g_du_z0'][i_fold].fit(x_z0_train_2, du_z0_train_2)
-            g_du_z0_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g_du_z0'][i_fold], x_test)
-
-            # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 1
-            z1_train_2 = z_train_2 == 1
-            x_z1_train_2 = x_train_2[z1_train_2, :]
-            du_z1_train_2 = (d_train_2[z1_train_2] == self._treatment) * (y_train_2[z1_train_2] <= ipw_est)
-            fitted_models['ml_g_du_z1'][i_fold].fit(x_z1_train_2, du_z1_train_2)
-            g_du_z1_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g_du_z1'][i_fold], x_test)
-
-            # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-            test_inds_z0 = test_inds[z_test == 0]
-            test_inds_z1 = test_inds[z_test == 1]
-            g_du_z0_hat['targets'][test_inds_z0] = (1.0 * (d_test[z_test == 0] == self._treatment) *
-                                                    (y_test[z_test == 0] <= ipw_est))
-            g_du_z1_hat['targets'][test_inds_z1] = (1.0 * (d_test[z_test == 1] == self._treatment) *
-                                                    (y_test[z_test == 1] <= ipw_est))
-
-            # refit nuisance elements for the local potential quantile
-            z_train = z[train_inds]
-            x_train = x[train_inds]
-            d_train = d[train_inds]
-
-            # refit propensity for z (whole training set)
-            fitted_models['ml_m_z'][i_fold].fit(x_train, z_train)
-            m_z_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_z'][i_fold], x_test)
-
-            # refit propensity for d == 1 cond. on z == 0 (whole training set)
-            z0_train = z_train == 0
-            x_z0_train = x_train[z0_train, :]
-            d_z0_train = d_train[z0_train]
-            fitted_models['ml_m_d_z0'][i_fold].fit(x_z0_train, d_z0_train)
-            m_d_z0_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_d_z0'][i_fold], x_test)
-
-            # propensity for d == 1 cond. on z == 1 (whole training set)
-            x_z1_train = x_train[z_train == 1, :]
-            d_z1_train = d_train[z_train == 1]
-            fitted_models['ml_m_d_z1'][i_fold].fit(x_z1_train, d_z1_train)
-            m_d_z1_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_d_z1'][i_fold], x_test)
+        if not all(ext_preds):
+            for i_fold in range(self.n_folds):
+                train_inds = smpls[i_fold][0]
+                test_inds = smpls[i_fold][1]
+
+                # start nested crossfitting
+                train_inds_1, train_inds_2 = train_test_split(
+                    train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
+                )
+                smpls_prelim = [
+                    (train, test)
+                    for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=strata[train_inds_1])
+                ]
+
+                d_train_1 = d[train_inds_1]
+                y_train_1 = y[train_inds_1]
+                x_train_1 = x[train_inds_1, :]
+                z_train_1 = z[train_inds_1]
+
+                # preliminary propensity for z
+                ml_m_z_prelim = clone(fitted_models["ml_m_z"][i_fold])
+                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
+                                                 method="predict_proba", smpls=smpls_prelim)[
+                    "preds"
+                ]
+
+                m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                if self._normalize_ipw:
+                    m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
+
+                # propensity for d == 1 cond. on z == 0 (training set 1)
+                z0_train_1 = z_train_1 == 0
+                x_z0_train_1 = x_train_1[z0_train_1, :]
+                d_z0_train_1 = d_train_1[z0_train_1]
+                ml_m_d_z0_prelim = clone(fitted_models["ml_m_d_z0"][i_fold])
+                ml_m_d_z0_prelim.fit(x_z0_train_1, d_z0_train_1)
+                m_d_z0_hat_prelim = _predict_zero_one_propensity(ml_m_d_z0_prelim, x_train_1)
+
+                # propensity for d == 1 cond. on z == 1 (training set 1)
+                z1_train_1 = z_train_1 == 1
+                x_z1_train_1 = x_train_1[z1_train_1, :]
+                d_z1_train_1 = d_train_1[z1_train_1]
+                ml_m_d_z1_prelim = clone(fitted_models["ml_m_d_z1"][i_fold])
+                ml_m_d_z1_prelim.fit(x_z1_train_1, d_z1_train_1)
+                m_d_z1_hat_prelim = _predict_zero_one_propensity(ml_m_d_z1_prelim, x_train_1)
+
+                # preliminary estimate of theta_2_aux
+                comp_prob_prelim = np.mean(
+                    m_d_z1_hat_prelim
+                    - m_d_z0_hat_prelim
+                    + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
+                    - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim)
+                )
+
+                # preliminary ipw estimate
+                def ipw_score(theta):
+                    res = np.mean(
+                        self._compute_ipw_score(theta, d_train_1, y_train_1, m_z_hat_prelim, z_train_1, comp_prob_prelim)
+                    )
+                    return res
+
+                _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
+                ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
+                ipw_vec[i_fold] = ipw_est
+
+                # use the preliminary estimates to fit the nuisance parameters on train_2
+                d_train_2 = d[train_inds_2]
+                y_train_2 = y[train_inds_2]
+                x_train_2 = x[train_inds_2, :]
+                z_train_2 = z[train_inds_2]
+
+                # define test observations
+                d_test = d[test_inds]
+                y_test = y[test_inds]
+                x_test = x[test_inds, :]
+                z_test = z[test_inds]
+
+                # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 0
+                z0_train_2 = z_train_2 == 0
+                x_z0_train_2 = x_train_2[z0_train_2, :]
+                du_z0_train_2 = (d_train_2[z0_train_2] == self._treatment) * (y_train_2[z0_train_2] <= ipw_est)
+                fitted_models["ml_g_du_z0"][i_fold].fit(x_z0_train_2, du_z0_train_2)
+                g_du_z0_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g_du_z0"][i_fold], x_test)
+
+                # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 1
+                z1_train_2 = z_train_2 == 1
+                x_z1_train_2 = x_train_2[z1_train_2, :]
+                du_z1_train_2 = (d_train_2[z1_train_2] == self._treatment) * (y_train_2[z1_train_2] <= ipw_est)
+                fitted_models["ml_g_du_z1"][i_fold].fit(x_z1_train_2, du_z1_train_2)
+                g_du_z1_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g_du_z1"][i_fold], x_test)
+
+                # the predictions of both should only be evaluated conditional on z == 0 or z == 1
+                test_inds_z0 = test_inds[z_test == 0]
+                test_inds_z1 = test_inds[z_test == 1]
+                g_du_z0_hat["targets"][test_inds_z0] = (
+                    1.0 * (d_test[z_test == 0] == self._treatment) * (y_test[z_test == 0] <= ipw_est)
+                )
+                g_du_z1_hat["targets"][test_inds_z1] = (
+                    1.0 * (d_test[z_test == 1] == self._treatment) * (y_test[z_test == 1] <= ipw_est)
+                )
+
+                # refit nuisance elements for the local potential quantile
+                z_train = z[train_inds]
+                x_train = x[train_inds]
+                d_train = d[train_inds]
+
+                # refit propensity for z (whole training set)
+                fitted_models["ml_m_z"][i_fold].fit(x_train, z_train)
+                m_z_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_z"][i_fold], x_test)
+
+                # refit propensity for d == 1 cond. on z == 0 (whole training set)
+                z0_train = z_train == 0
+                x_z0_train = x_train[z0_train, :]
+                d_z0_train = d_train[z0_train]
+                fitted_models["ml_m_d_z0"][i_fold].fit(x_z0_train, d_z0_train)
+                m_d_z0_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_d_z0"][i_fold], x_test)
+
+                # propensity for d == 1 cond. on z == 1 (whole training set)
+                x_z1_train = x_train[z_train == 1, :]
+                d_z1_train = d_train[z_train == 1]
+                fitted_models["ml_m_d_z1"][i_fold].fit(x_z1_train, d_z1_train)
+                m_d_z1_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_d_z1"][i_fold], x_test)
 
         # save targets and models
-        m_z_hat['targets'] = z
+        m_z_hat["targets"] = z
+
         # set targets to relevant subsample
-        g_du_z0_hat['targets'][z == 1] = np.nan
-        g_du_z1_hat['targets'][z == 0] = np.nan
+        g_du_z0_hat["targets"] = _cond_targets(g_du_z0_hat["targets"], cond_sample=(z == 0))
+        g_du_z1_hat["targets"] = _cond_targets(g_du_z1_hat["targets"], cond_sample=(z == 1))
 
         # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-        m_d_z0_hat['targets'][z == 0] = d[z == 0]
-        m_d_z0_hat['targets'][z == 1] = np.nan
-        m_d_z1_hat['targets'][z == 1] = d[z == 1]
-        m_d_z1_hat['targets'][z == 0] = np.nan
+        m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 0))
+        m_d_z1_hat["targets"] = _cond_targets(d, cond_sample=(z == 1))
 
         if return_models:
-            m_z_hat['models'] = fitted_models['ml_m_z']
-            m_d_z0_hat['models'] = fitted_models['ml_m_d_z0']
-            m_d_z1_hat['models'] = fitted_models['ml_m_d_z1']
-            g_du_z0_hat['models'] = fitted_models['ml_g_du_z0']
-            g_du_z1_hat['models'] = fitted_models['ml_g_du_z1']
+            m_z_hat["models"] = fitted_models["ml_m_z"]
+            m_d_z0_hat["models"] = fitted_models["ml_m_d_z0"]
+            m_d_z1_hat["models"] = fitted_models["ml_m_d_z1"]
+            g_du_z0_hat["models"] = fitted_models["ml_g_du_z0"]
+            g_du_z1_hat["models"] = fitted_models["ml_g_du_z1"]
 
         # clip propensities
-        m_z_hat_adj = _trimm(m_z_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        m_z_hat_adj = _trimm(m_z_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         if self._normalize_ipw:
-            if self.dml_procedure == 'dml1':
+            if self.dml_procedure == "dml1":
                 for _, test_index in smpls:
                     m_z_hat_adj[test_index] = _normalize_ipw(m_z_hat_adj[test_index], z[test_index])
             else:
@@ -454,49 +531,60 @@ def ipw_score(theta):
 
         # this could be adjusted to be compatible with dml1
         # estimate final nuisance parameter
-        comp_prob_hat = np.mean(m_d_z1_hat['preds'] - m_d_z0_hat['preds']
-                                + z / m_z_hat_adj * (d - m_d_z1_hat['preds'])
-                                - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat['preds']))
-
-        # readjust start value for minimization
-        self._coef_start_val = np.mean(ipw_vec)
-
-        psi_elements = {'ind_d': d == self._treatment, 'm_z': m_z_hat_adj,
-                        'g_du_z0': g_du_z0_hat['preds'], 'g_du_z1': g_du_z1_hat['preds'],
-                        'y': y, 'z': z, 'comp_prob': comp_prob_hat}
-        preds = {'predictions': {'ml_m_z':  m_z_hat['preds'],
-                                 'ml_m_d_z0': m_d_z0_hat['preds'],
-                                 'ml_m_d_z1': m_d_z1_hat['preds'],
-                                 'ml_g_du_z0': g_du_z0_hat['preds'],
-                                 'ml_g_du_z1': g_du_z1_hat['preds']},
-                 'targets': {'ml_m_z':  m_z_hat['targets'],
-                             'ml_m_d_z0': m_d_z0_hat['targets'],
-                             'ml_m_d_z1': m_d_z1_hat['targets'],
-                             'ml_g_du_z0': g_du_z0_hat['targets'],
-                             'ml_g_du_z1': g_du_z1_hat['targets']},
-                 'models': {'ml_m_z':  m_z_hat['models'],
-                            'ml_m_d_z0': m_d_z0_hat['models'],
-                            'ml_m_d_z1': m_d_z1_hat['models'],
-                            'ml_g_du_z0': g_du_z0_hat['models'],
-                            'ml_g_du_z1': g_du_z1_hat['models']}
-                 }
+        comp_prob_hat = np.mean(
+            m_d_z1_hat["preds"]
+            - m_d_z0_hat["preds"]
+            + z / m_z_hat_adj * (d - m_d_z1_hat["preds"])
+            - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat["preds"])
+        )
+
+        if not all(ext_preds):
+            # readjust start value for minimization
+            self._coef_start_val = np.mean(ipw_vec)
+
+        psi_elements = {
+            "ind_d": d == self._treatment,
+            "m_z": m_z_hat_adj,
+            "g_du_z0": g_du_z0_hat["preds"],
+            "g_du_z1": g_du_z1_hat["preds"],
+            "y": y,
+            "z": z,
+            "comp_prob": comp_prob_hat,
+        }
+        preds = {
+            "predictions": {
+                "ml_m_z": m_z_hat["preds"],
+                "ml_m_d_z0": m_d_z0_hat["preds"],
+                "ml_m_d_z1": m_d_z1_hat["preds"],
+                "ml_g_du_z0": g_du_z0_hat["preds"],
+                "ml_g_du_z1": g_du_z1_hat["preds"],
+            },
+            "targets": {
+                "ml_m_z": m_z_hat["targets"],
+                "ml_m_d_z0": m_d_z0_hat["targets"],
+                "ml_m_d_z1": m_d_z1_hat["targets"],
+                "ml_g_du_z0": g_du_z0_hat["targets"],
+                "ml_g_du_z1": g_du_z1_hat["targets"],
+            },
+            "models": {
+                "ml_m_z": m_z_hat["models"],
+                "ml_m_d_z0": m_d_z0_hat["models"],
+                "ml_m_d_z1": m_d_z1_hat["models"],
+                "ml_g_du_z0": g_du_z0_hat["models"],
+                "ml_g_du_z1": g_du_z1_hat["models"],
+            },
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_m_z': None,
-                               'ml_m_d_z0': None,
-                               'ml_m_d_z1': None,
-                               'ml_g_du_z0': None,
-                               'ml_g_du_z1': None}
+            scoring_methods = {"ml_m_z": None, "ml_m_d_z0": None, "ml_m_d_z1": None, "ml_g_du_z0": None, "ml_g_du_z1": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_z0 = [np.intersect1d(np.where(z == 0)[0], train) for train, _ in smpls]
@@ -505,21 +593,66 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         approx_quant = np.quantile(y[d == self.treatment], self.quantile)
         du = (d == self.treatment) * (y <= approx_quant)
 
-        m_z_tune_res = _dml_tune(z, x, train_inds,
-                                 self._learner['ml_m_z'], param_grids['ml_m_z'], scoring_methods['ml_m_z'],
-                                 n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_d_z0_tune_res = _dml_tune(d, x, train_inds_z0,
-                                    self._learner['ml_m_d_z0'], param_grids['ml_m_d_z0'], scoring_methods['ml_m_d_z0'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_d_z1_tune_res = _dml_tune(d, x, train_inds_z1,
-                                    self._learner['ml_m_d_z1'], param_grids['ml_m_d_z1'], scoring_methods['ml_m_d_z1'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_du_z0_tune_res = _dml_tune(du, x, train_inds_z0,
-                                     self._learner['ml_g_du_z0'], param_grids['ml_g_du_z0'], scoring_methods['ml_g_du_z0'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_du_z1_tune_res = _dml_tune(du, x, train_inds_z1,
-                                     self._learner['ml_g_du_z1'], param_grids['ml_g_du_z1'], scoring_methods['ml_g_du_z1'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        m_z_tune_res = _dml_tune(
+            z,
+            x,
+            train_inds,
+            self._learner["ml_m_z"],
+            param_grids["ml_m_z"],
+            scoring_methods["ml_m_z"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_d_z0_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds_z0,
+            self._learner["ml_m_d_z0"],
+            param_grids["ml_m_d_z0"],
+            scoring_methods["ml_m_d_z0"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_d_z1_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds_z1,
+            self._learner["ml_m_d_z1"],
+            param_grids["ml_m_d_z1"],
+            scoring_methods["ml_m_d_z1"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_du_z0_tune_res = _dml_tune(
+            du,
+            x,
+            train_inds_z0,
+            self._learner["ml_g_du_z0"],
+            param_grids["ml_g_du_z0"],
+            scoring_methods["ml_g_du_z0"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_du_z1_tune_res = _dml_tune(
+            du,
+            x,
+            train_inds_z1,
+            self._learner["ml_g_du_z1"],
+            param_grids["ml_g_du_z1"],
+            scoring_methods["ml_g_du_z1"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         m_z_best_params = [xx.best_params_ for xx in m_z_tune_res]
         m_d_z0_best_params = [xx.best_params_ for xx in m_d_z0_tune_res]
@@ -527,34 +660,40 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         g_du_z0_best_params = [xx.best_params_ for xx in g_du_z0_tune_res]
         g_du_z1_best_params = [xx.best_params_ for xx in g_du_z1_tune_res]
 
-        params = {'ml_m_z': m_z_best_params,
-                  'ml_m_d_z0': m_d_z0_best_params,
-                  'ml_m_d_z1': m_d_z1_best_params,
-                  'ml_g_du_z0': g_du_z0_best_params,
-                  'ml_g_du_z1': g_du_z1_best_params}
-        tune_res = {'ml_m_z': m_z_tune_res,
-                    'ml_m_d_z0': m_d_z0_tune_res,
-                    'ml_m_d_z1':  m_d_z1_tune_res,
-                    'ml_g_du_z0': g_du_z0_tune_res,
-                    'ml_g_du_z1': g_du_z1_tune_res}
-
-        res = {'params': params,
-               'tune_res': tune_res}
+        params = {
+            "ml_m_z": m_z_best_params,
+            "ml_m_d_z0": m_d_z0_best_params,
+            "ml_m_d_z1": m_d_z1_best_params,
+            "ml_g_du_z0": g_du_z0_best_params,
+            "ml_g_du_z1": g_du_z1_best_params,
+        }
+        tune_res = {
+            "ml_m_z": m_z_tune_res,
+            "ml_m_d_z0": m_d_z0_tune_res,
+            "ml_m_d_z1": m_d_z1_tune_res,
+            "ml_g_du_z0": g_du_z0_tune_res,
+            "ml_g_du_z1": g_du_z1_tune_res,
+        }
+
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_zero_one_treatment(self)
-        one_instr = (obj_dml_data.n_instr == 1)
-        err_msg = ('Incompatible data. '
-                   'To fit an LPQ model with DML '
-                   'exactly one binary variable with values 0 and 1 '
-                   'needs to be specified as instrumental variable.')
+        one_instr = obj_dml_data.n_instr == 1
+        err_msg = (
+            "Incompatible data. "
+            "To fit an LPQ model with DML "
+            "exactly one binary variable with values 0 and 1 "
+            "needs to be specified as instrumental variable."
+        )
         if one_instr:
-            binary_instr = (type_of_target(obj_dml_data.z) == 'binary')
+            binary_instr = type_of_target(obj_dml_data.z) == "binary"
             zero_one_instr = np.all((np.power(obj_dml_data.z, 2) - obj_dml_data.z) == 0)
             if not (one_instr & binary_instr & zero_one_instr):
                 raise ValueError(err_msg)
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 4c70c495..6725e925 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -145,6 +145,7 @@ def __init__(self,
         if 'ml_g' in self._learner:
             self._predict_method['ml_g'] = 'predict'
         self._initialize_ml_nuisance_params()
+        self._external_predictions_implemented = True
 
     @classmethod
     def _partialX(cls,
@@ -283,9 +284,9 @@ def _check_data(self, obj_dml_data):
                              'use DoubleMLPLR instead of DoubleMLPLIV.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         if self.partialX & (not self.partialZ):
-            psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
+            psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, external_predictions, return_models)
         elif (not self.partialX) & self.partialZ:
             psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, return_models)
         else:
@@ -309,16 +310,21 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
 
         return res
 
-    def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
+        if external_predictions['ml_l'] is not None:
+            l_hat = {'preds': external_predictions['ml_l'],
+                     'targets': None,
+                     'models': None}
+        else:
+            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
+                                    return_models=return_models)
         _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         predictions = {'ml_l': l_hat['preds']}
@@ -329,36 +335,54 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
             # one instrument: just identified
             x, z = check_X_y(x, np.ravel(self._dml_data.z),
                              force_all_finite=False)
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                         'targets': None,
+                         'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             predictions['ml_m'] = m_hat['preds']
             targets['ml_m'] = m_hat['targets']
             models['ml_m'] = m_hat['models']
         else:
             # several instruments: 2SLS
             m_hat = {'preds': np.full((self._dml_data.n_obs, self._dml_data.n_instr), np.nan),
+                     'targets': [None] * self._dml_data.n_instr,
                      'models': [None] * self._dml_data.n_instr}
-            z = self._dml_data.z
             for i_instr in range(self._dml_data.n_instr):
+                z = self._dml_data.z
                 x, this_z = check_X_y(x, z[:, i_instr],
                                       force_all_finite=False)
-                res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
-                                                 est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
-                                                 method=self._predict_method['ml_m'], return_models=return_models)
+                if external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]] is not None:
+                    m_hat['preds'][:, i_instr] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions[
+                        'ml_m_' + self._dml_data.z_cols[i_instr]]
+                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                else:
+                    res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
+                                                     est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
+                                                     method=self._predict_method['ml_m'], return_models=return_models)
 
-                m_hat['preds'][:, i_instr] = res_cv_predict['preds']
+                    m_hat['preds'][:, i_instr] = res_cv_predict['preds']
 
-                predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
-                targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
-                models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
+                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
+                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
 
         _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
 
         # nuisance r
-        r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
-                                return_models=return_models)
+        if external_predictions['ml_r'] is not None:
+            r_hat = {'preds': external_predictions['ml_r'],
+                     'targets': None,
+                     'models': None}
+        else:
+            r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
+                                    return_models=return_models)
         _check_finite_predictions(r_hat['preds'], self._learner['ml_r'], 'ml_r', smpls)
         predictions['ml_r'] = r_hat['preds']
         targets['ml_r'] = r_hat['targets']
@@ -368,13 +392,18 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
         if (self._dml_data.n_instr == 1) & ('ml_g' in self._learner):
             # an estimate of g is obtained for the IV-type score and callable scores
             # get an initial estimate for theta using the partialling out score
-            psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
-            psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
-            theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-            # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+            if external_predictions['ml_g'] is not None:
+                g_hat = {'preds': external_predictions['ml_g'],
+                         'targets': None,
+                         'models': None}
+            else:
+                psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
+                psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
+                theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+                # nuisance g
+                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
             _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
 
         predictions['ml_g'] = g_hat['preds']
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index d75cbcf8..cd81bd1f 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 from sklearn.base import clone
@@ -8,6 +9,7 @@
 from .double_ml import DoubleML
 from .double_ml_data import DoubleMLData
 from .double_ml_score_mixins import LinearScoreMixin
+from .double_ml_blp import DoubleMLBLP
 
 from ._utils import _dml_cv_predict, _dml_tune
 from ._utils_checks import _check_score, _check_finite_predictions, _check_is_propensity
@@ -150,6 +152,7 @@ def __init__(self,
 
         self._initialize_ml_nuisance_params()
         self._sensitivity_implemented = True
+        self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
@@ -166,23 +169,43 @@ def _check_data(self, obj_dml_data):
                              'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
+        m_external = external_predictions['ml_m'] is not None
+        l_external = external_predictions['ml_l'] is not None
+        if 'ml_g' in self._learner:
+            g_external = external_predictions['ml_g'] is not None
+        else:
+            g_external = False
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
-        _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+        if l_external:
+            l_hat = {'preds': external_predictions['ml_l'],
+                     'targets': None,
+                     'models': None}
+        elif self._score == "IV-type" and g_external:
+            l_hat = {'preds': None,
+                     'targets': None,
+                     'models': None}
+        else:
+            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
+                                    return_models=return_models)
+            _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+        if m_external:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
         if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
 
@@ -198,15 +221,20 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
         # an estimate of g is obtained for the IV-type score and callable scores
         g_hat = {'preds': None, 'targets': None, 'models': None}
         if 'ml_g' in self._learner:
-            # get an initial estimate for theta using the partialling out score
-            psi_a = -np.multiply(d - m_hat['preds'], d - m_hat['preds'])
-            psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
-            theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
-            _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            if g_external:
+                g_hat = {'preds': external_predictions['ml_g'],
+                         'targets': None,
+                         'models': None}
+            else:
+                # get an initial estimate for theta using the partialling out score
+                psi_a = -np.multiply(d - m_hat['preds'], d - m_hat['preds'])
+                psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
+                theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+                _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
 
         psi_a, psi_b = self._score_elements(y, d, l_hat['preds'], m_hat['preds'], g_hat['preds'], smpls)
         psi_elements = {'psi_a': psi_a,
@@ -224,8 +252,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
         return psi_elements, preds
 
     def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
-        # compute residuals
-        u_hat = y - l_hat
+        # compute residual
         v_hat = d - m_hat
 
         if isinstance(self.score, str):
@@ -234,6 +261,7 @@ def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
                 psi_b = np.multiply(v_hat, y - g_hat)
             else:
                 assert self.score == 'partialling out'
+                u_hat = y - l_hat
                 psi_a = -np.multiply(v_hat, v_hat)
                 psi_b = np.multiply(v_hat, u_hat)
         else:
@@ -327,3 +355,103 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
                'tune_res': tune_res}
 
         return res
+
+    def cate(self, basis, is_gate=False):
+        """
+        Calculate conditional average treatment effects (CATE) for a given basis.
+
+        Parameters
+        ----------
+        basis : :class:`pandas.DataFrame`
+            The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
+            where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
+        is_gate : bool
+            Indicates whether the basis is constructed for GATEs (dummy-basis).
+            Default is ``False``.
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLBLP`
+            Best linear Predictor model.
+        """
+        if self._dml_data.n_treat > 1:
+            raise NotImplementedError('Only implemented for single treatment. ' +
+                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
+        if self.n_rep != 1:
+            raise NotImplementedError('Only implemented for one repetition. ' +
+                                      f'Number of repetitions is {str(self.n_rep)}.')
+
+        Y_tilde, D_tilde = self._partial_out()
+
+        D_basis = basis * D_tilde
+        model = DoubleMLBLP(
+            orth_signal=Y_tilde.reshape(-1),
+            basis=D_basis,
+            is_gate=is_gate,
+        )
+        model.fit()
+        return model
+
+    def gate(self, groups):
+        """
+        Calculate group average treatment effects (GATE) for groups.
+
+        Parameters
+        ----------
+        groups : :class:`pandas.DataFrame`
+            The group indicator for estimating the best linear predictor. Groups should be mutually exclusive.
+            Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
+            and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLBLP`
+            Best linear Predictor model for Group Effects.
+        """
+
+        if not isinstance(groups, pd.DataFrame):
+            raise TypeError('Groups must be of DataFrame type. '
+                            f'Groups of type {str(type(groups))} was passed.')
+        if not all(groups.dtypes == bool) or all(groups.dtypes == int):
+            if groups.shape[1] == 1:
+                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+            else:
+                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
+                                'Alternatively, groups should only contain one column.')
+
+        if any(groups.sum(0) <= 5):
+            warnings.warn('At least one group effect is estimated with less than 6 observations.')
+
+        model = self.cate(groups, is_gate=True)
+        return model
+
+    def _partial_out(self):
+        """
+        Helper function. Returns the partialled out quantities of Y and D.
+        Works with multiple repetitions.
+
+        Returns
+        -------
+        Y_tilde : :class:`numpy.ndarray`
+            The residual of the regression of Y on X.
+        D_tilde : :class:`numpy.ndarray`
+            The residual of the regression of D on X.
+        """
+        if self.predictions is None:
+            raise ValueError('predictions are None. Call .fit(store_predictions=True) to store the predictions.')
+
+        y = self._dml_data.y.reshape(-1, 1)
+        d = self._dml_data.d.reshape(-1, 1)
+        ml_m = self.predictions["ml_m"].squeeze(axis=2)
+
+        if self.score == "partialling out":
+            ml_l = self.predictions["ml_l"].squeeze(axis=2)
+            Y_tilde = y - ml_l
+            D_tilde = d - ml_m
+        else:
+            assert self.score == "IV-type"
+            ml_g = self.predictions["ml_g"].squeeze(axis=2)
+            Y_tilde = y - (self.coef * ml_m) - ml_g
+            D_tilde = d - ml_m
+
+        return Y_tilde, D_tilde
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index d9a36d41..dee3dc12 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.base import clone
 from sklearn.utils import check_X_y
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -8,11 +7,26 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from .double_ml_data import DoubleMLData
 
-from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _get_bracket_guess, \
-    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score
+from ._utils import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+    _cond_targets,
+)
 from ._utils_resampling import DoubleMLResampling
-from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_contains_iv, _check_quantile
+from ._utils_checks import (
+    _check_score,
+    _check_trimming,
+    _check_zero_one_treatment,
+    _check_treatment,
+    _check_contains_iv,
+    _check_quantile,
+)
 
 
 class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
@@ -130,21 +144,22 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. "
+                                "%r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
         self._check_data(self._dml_data)
 
-        valid_score = ['PQ']
+        valid_score = ["PQ"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -155,20 +170,23 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
-        self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": ml_g, "ml_m": ml_m}
+        self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
 
         self._initialize_ml_nuisance_params()
 
         if draw_sample_splitting:
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    apply_cross_fitting=self.apply_cross_fitting,
-                                                    stratify=self._dml_data.d)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                apply_cross_fitting=self.apply_cross_fitting,
+                stratify=self._dml_data.d,
+            )
             self._smpls = obj_dml_resampling.split_samples()
+        self._external_predictions_implemented = True
 
     @property
     def quantile(self):
@@ -214,36 +232,36 @@ def trimming_threshold(self):
 
     @property
     def _score_element_names(self):
-        return ['ind_d', 'g', 'm', 'y']
+        return ["ind_d", "g", "m", "y"]
 
     def _compute_ipw_score(self, theta, d, y, prop):
         score = (d == self.treatment) / prop * (y <= theta) - self.quantile
         return score
 
     def _compute_score(self, psi_elements, coef, inds=None):
-        ind_d = psi_elements['ind_d']
-        g = psi_elements['g']
-        m = psi_elements['m']
-        y = psi_elements['y']
+        ind_d = psi_elements["ind_d"]
+        g = psi_elements["g"]
+        m = psi_elements["m"]
+        y = psi_elements["y"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            g = psi_elements['g'][inds]
-            m = psi_elements['m'][inds]
-            y = psi_elements['y'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            g = psi_elements["g"][inds]
+            m = psi_elements["m"][inds]
+            y = psi_elements["y"][inds]
 
         score = ind_d * ((y <= coef) - g) / m + g - self.quantile
         return score
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        ind_d = psi_elements['ind_d']
-        m = psi_elements['m']
-        y = psi_elements['y']
+        ind_d = psi_elements["ind_d"]
+        m = psi_elements["m"]
+        y = psi_elements["y"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            m = psi_elements['m'][inds]
-            y = psi_elements['y'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            m = psi_elements["m"][inds]
+            y = psi_elements["y"][inds]
 
         score_weights = ind_d / m
 
@@ -253,105 +271,133 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         return deriv
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_g', 'ml_m']}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+
+        g_external = external_predictions["ml_g"] is not None
+        m_external = external_predictions["ml_m"] is not None
 
         # initialize nuisance predictions, targets and models
-        g_hat = {'models': None,
-                 'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                 'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                 }
-        m_hat = copy.deepcopy(g_hat)
 
+        if not g_external:
+            g_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+        if not m_external:
+            m_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
         ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
         # initialize models
         fitted_models = {}
         for learner in self.params_names:
             # set nuisance model parameters
-            est_params = self._get_params(learner)
-            if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
-            else:
-                fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+            if (learner == "ml_g" and not g_external) or (learner == "ml_m" and not m_external):
+                est_params = self._get_params(learner)
+                if est_params is not None:
+                    fitted_models[learner] = [
+                        clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                    ]
+                else:
+                    fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        if g_external:
+            g_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g"],
+            }
+        if m_external:
+            m_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m"],
+            }
 
         # caculate nuisance functions over different folds
-        for i_fold in range(self.n_folds):
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
-
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=d[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
-
-            d_train_1 = d[train_inds_1]
-            y_train_1 = y[train_inds_1]
-            x_train_1 = x[train_inds_1, :]
-
-            # get a copy of ml_m as a preliminary learner
-            ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
-            m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
-                                           method='predict_proba', smpls=smpls_prelim)['preds']
-
-            m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
-
-            if self._normalize_ipw:
-                m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
-            if self.treatment == 0:
-                m_hat_prelim = 1 - m_hat_prelim
-
-            # preliminary ipw estimate
-            def ipw_score(theta):
-                res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_hat_prelim))
-                return res
-
-            _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
-            ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
-            ipw_vec[i_fold] = ipw_est
-
-            # use the preliminary estimates to fit the nuisance parameters on train_2
-            d_train_2 = d[train_inds_2]
-            y_train_2 = y[train_inds_2]
-            x_train_2 = x[train_inds_2, :]
-
-            dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
-            y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
-
-            fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
-
-            # predict nuisance values on the test data and the corresponding targets
-            g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
-            g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
-
-            # refit the propensity score on the whole training set
-            fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
-            m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
+        if not all([g_external, m_external]):
+            for i_fold in range(self.n_folds):
+                train_inds = smpls[i_fold][0]
+                test_inds = smpls[i_fold][1]
+
+                # start nested crossfitting
+                train_inds_1, train_inds_2 = train_test_split(
+                    train_inds, test_size=0.5, random_state=42, stratify=d[train_inds]
+                )
+                smpls_prelim = [
+                    (train, test)
+                    for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])
+                ]
+
+                d_train_1 = d[train_inds_1]
+                y_train_1 = y[train_inds_1]
+                x_train_1 = x[train_inds_1, :]
+
+                if not m_external:
+                    # get a copy of ml_m as a preliminary learner
+                    ml_m_prelim = clone(fitted_models["ml_m"][i_fold])
+                    m_hat_prelim = _dml_cv_predict(
+                        ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim
+                    )["preds"]
+                else:
+                    m_hat_prelim = m_hat["preds"][np.concatenate([test for _, test in smpls_prelim])]
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                if self._normalize_ipw:
+                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                if self.treatment == 0:
+                    m_hat_prelim = 1 - m_hat_prelim
+
+                # preliminary ipw estimate
+                def ipw_score(theta):
+                    res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_hat_prelim))
+                    return res
+
+                _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
+                ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
+                ipw_vec[i_fold] = ipw_est
+
+                # use the preliminary estimates to fit the nuisance parameters on train_2
+                d_train_2 = d[train_inds_2]
+                y_train_2 = y[train_inds_2]
+                x_train_2 = x[train_inds_2, :]
+
+                dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
+                y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
+
+                if not g_external:
+                    fitted_models["ml_g"][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
+
+                    # predict nuisance values on the test data and the corresponding targets
+                    g_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g"][i_fold], x[test_inds, :])
+                    g_hat["targets"][test_inds] = y[test_inds] <= ipw_est
+                if not m_external:
+                    # refit the propensity score on the whole training set
+                    fitted_models["ml_m"][i_fold].fit(x[train_inds, :], d[train_inds])
+                    m_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m"][i_fold], x[test_inds, :])
 
         # set target for propensity score
-        m_hat['targets'] = d
+        m_hat["targets"] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = g_hat['targets'].astype(float)
-        g_hat['targets'][d != self.treatment] = np.nan
+        g_hat["targets"] = _cond_targets(g_hat["targets"], cond_sample=(d == self.treatment))
 
         if return_models:
-            g_hat['models'] = fitted_models['ml_g']
-            m_hat['models'] = fitted_models['ml_m']
+            g_hat["models"] = fitted_models["ml_g"]
+            m_hat["models"] = fitted_models["ml_m"]
 
         # clip propensities and normalize ipw weights
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
-        m_hat_adj = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+
+        m_hat_adj = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
         if self._normalize_ipw:
-            if self.dml_procedure == 'dml1':
+            if self.dml_procedure == "dml1":
                 for _, test_index in smpls:
                     m_hat_adj[test_index] = _normalize_ipw(m_hat_adj[test_index], d[test_index])
             else:
@@ -359,63 +405,74 @@ def ipw_score(theta):
 
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
-
         # readjust start value for minimization
-        self._coef_start_val = np.mean(ipw_vec)
-
-        psi_elements = {'ind_d': d == self.treatment, 'g': g_hat['preds'],
-                        'm': m_hat_adj, 'y': y}
-
-        preds = {'predictions': {'ml_g': g_hat['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g': g_hat['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g': g_hat['models'],
-                            'ml_m': m_hat['models']}
-                 }
+        if not g_external or not m_external:
+            self._coef_start_val = np.mean(ipw_vec)
+
+        psi_elements = {"ind_d": d == self.treatment, "g": g_hat["preds"], "m": m_hat_adj, "y": y}
+
+        preds = {
+            "predictions": {"ml_g": g_hat["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g": g_hat["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g": g_hat["models"], "ml_m": m_hat["models"]},
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_treat = [np.intersect1d(np.where(d == self.treatment)[0], train) for train, _ in smpls]
 
         # use self._coef_start_val as a very crude approximation of ipw_est
         approx_goal = y <= np.quantile(y[d == self.treatment], self.quantile)
-        g_tune_res = _dml_tune(approx_goal, x, train_inds_treat,
-                               self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g_tune_res = _dml_tune(
+            approx_goal,
+            x,
+            train_inds_treat,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g_best_params = [xx.best_params_ for xx in g_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g': g_best_params,
-                  'ml_m': m_best_params}
-        tune_res = {'g_tune': g_tune_res,
-                    'm_tune': m_tune_res}
+        params = {"ml_g": g_best_params, "ml_m": m_best_params}
+        tune_res = {"g_tune": g_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_contains_iv(obj_dml_data)
         _check_zero_one_treatment(self)
         return
diff --git a/doubleml/double_ml_qte.py b/doubleml/double_ml_qte.py
index c640abf7..9633434f 100644
--- a/doubleml/double_ml_qte.py
+++ b/doubleml/double_ml_qte.py
@@ -161,7 +161,7 @@ def __init__(self,
         self._modellist_0, self._modellist_1 = self._initialize_models()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv,\
+        self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
@@ -386,7 +386,7 @@ def __psi1_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_quant, self._i_rep]
 
-    def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False):
+    def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None):
         """
         Estimate DoubleMLQTE models.
 
@@ -415,6 +415,9 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         self : object
         """
 
+        if external_predictions is not None:
+            raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
+
         # parallel estimation of the quantiles
         parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs')
         fitted_models = parallel(delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
diff --git a/doubleml/tests/_utils_dml_cv_predict.py b/doubleml/tests/_utils_dml_cv_predict.py
index 8ec1d95d..2225cfe3 100644
--- a/doubleml/tests/_utils_dml_cv_predict.py
+++ b/doubleml/tests/_utils_dml_cv_predict.py
@@ -8,6 +8,21 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection._validation import _fit_and_predict, _check_is_permutation
 
+# Adapt _fit_and_predict for earlier sklearn versions
+from distutils.version import LooseVersion
+from sklearn import __version__ as sklearn_version
+
+if LooseVersion(sklearn_version) < LooseVersion("1.4.0"):
+    def _fit_and_predict_adapted(estimator, x, y, train, test, fit_params, method):
+        res = _fit_and_predict(estimator, x, y, train, test,
+                               verbose=0,
+                               fit_params=fit_params,
+                               method=method)
+        return res
+else:
+    def _fit_and_predict_adapted(estimator, x, y, train, test, fit_params, method):
+        return _fit_and_predict(estimator, x, y, train, test, fit_params, method)
+
 
 def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
                                n_jobs=None, est_params=None, method='predict'):
@@ -22,18 +37,19 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
         train_index, test_index = smpls[0]
         # set some defaults aligned with cross_val_predict
         fit_params = None
-        verbose = 0
         if method == 'predict_proba':
             predictions = np.full((len(y), 2), np.nan)
         else:
             predictions = np.full(len(y), np.nan)
         if est_params is None:
-            xx = _fit_and_predict(clone(estimator),
-                                  x, y, train_index, test_index, verbose, fit_params, method)
+            xx = _fit_and_predict_adapted(
+                clone(estimator),
+                x, y, train_index, test_index, fit_params, method)
         else:
             assert isinstance(est_params, dict)
-            xx = _fit_and_predict(clone(estimator).set_params(**est_params),
-                                  x, y, train_index, test_index, verbose, fit_params, method)
+            xx = _fit_and_predict_adapted(
+                clone(estimator).set_params(**est_params),
+                x, y, train_index, test_index, fit_params, method)
 
         # implementation is (also at other parts) restricted to a sorted set of test_indices, but this could be fixed
         # inv_test_indices = np.argsort(test_indices)
@@ -61,22 +77,22 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
                         pre_dispatch=pre_dispatch)
     # FixMe: Find a better way to handle the different combinations of paramters and smpls_is_partition
     if est_params is None:
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
+        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
             estimator,
-            x, y, train_index, test_index, verbose, fit_params, method)
+            x, y, train_index, test_index, fit_params, method)
                                      for idx, (train_index, test_index) in enumerate(smpls))
     elif isinstance(est_params, dict):
         # if no fold-specific parameters we redirect to the standard method
         # warnings.warn("Using the same (hyper-)parameters for all folds")
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
+        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
             clone(estimator).set_params(**est_params),
-            x, y, train_index, test_index, verbose, fit_params, method)
+            x, y, train_index, test_index, fit_params, method)
                                      for idx, (train_index, test_index) in enumerate(smpls))
     else:
         assert len(est_params) == len(smpls), 'provide one parameter setting per fold'
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
+        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
             clone(estimator).set_params(**est_params[idx]),
-            x, y, train_index, test_index, verbose, fit_params, method)
+            x, y, train_index, test_index, fit_params, method)
             for idx, (train_index, test_index) in enumerate(smpls))
 
     # Concatenate the predictions
diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index 5f162bee..3333550f 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -81,7 +81,7 @@ def fit_nuisance_irm(y, x, d, learner_g, learner_m, smpls, score,
 
     p_hat_list = []
     for (_, test_index) in smpls:
-        p_hat_list.append(np.mean(d[test_index]))
+        p_hat_list.append(np.mean(d))
 
     return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list
 
@@ -130,20 +130,23 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
     if normalize_ipw:
         for _, test_index in smpls:
-            m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+            m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+    else:
+        m_hat_adj = m_hat
 
     for idx, (_, test_index) in enumerate(smpls):
         thetas[idx] = irm_orth(g_hat0[test_index], g_hat1[test_index],
-                               m_hat[test_index], p_hat[test_index],
+                               m_hat_adj[test_index], p_hat[test_index],
                                u_hat0[test_index], u_hat1[test_index],
                                d[test_index], score)
     theta_hat = np.mean(thetas)
 
     if len(smpls) > 1:
         se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                             m_hat, p_hat,
+                             m_hat_adj, p_hat,
                              u_hat0, u_hat1,
                              d, score, n_obs))
     else:
@@ -151,7 +154,7 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
         test_index = smpls[0][1]
         n_obs = len(test_index)
         se = np.sqrt(var_irm(theta_hat, g_hat0[test_index], g_hat1[test_index],
-                             m_hat[test_index], p_hat[test_index],
+                             m_hat_adj[test_index], p_hat[test_index],
                              u_hat0[test_index], u_hat1[test_index],
                              d[test_index], score, n_obs))
 
@@ -164,12 +167,14 @@ def irm_dml2(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
     if normalize_ipw:
-        m_hat = _normalize_ipw(m_hat, d)
+        m_hat_adj = _normalize_ipw(m_hat, d)
+    else:
+        m_hat_adj = m_hat
 
-    theta_hat = irm_orth(g_hat0, g_hat1, m_hat, p_hat,
+    theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat,
                          u_hat0, u_hat1, d, score)
     se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                         m_hat, p_hat,
+                         m_hat_adj, p_hat,
                          u_hat0, u_hat1,
                          d, score, n_obs))
 
@@ -240,12 +245,15 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
     if normalize_ipw:
         if dml_procedure == 'dml1':
             for _, test_index in smpls:
-                m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+                m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
         else:
-            m_hat = _normalize_ipw(m_hat, d)
+            m_hat_adj = _normalize_ipw(m_hat, d)
+    else:
+        m_hat_adj = m_hat
 
     if apply_cross_fitting:
         if score == 'ATE':
@@ -263,13 +271,13 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
 
     if score == 'ATE':
         psi = g_hat1 - g_hat0 \
-                + np.divide(np.multiply(d, u_hat1), m_hat) \
-                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat) - theta
+                + np.divide(np.multiply(d, u_hat1), m_hat_adj) \
+                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat_adj) - theta
     else:
         assert score == 'ATTE'
         psi = np.divide(np.multiply(d, u_hat0), p_hat) \
-            - np.divide(np.multiply(m_hat, np.multiply(1.-d, u_hat0)),
-                        np.multiply(p_hat, (1.-m_hat))) \
+            - np.divide(np.multiply(m_hat_adj, np.multiply(1.-d, u_hat0)),
+                        np.multiply(p_hat, (1.-m_hat_adj))) \
             - theta * np.divide(d, p_hat)
 
     boot_theta, boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
@@ -290,7 +298,11 @@ def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep):
 
         m_hat = predictions['ml_m'][:, i_rep, 0]
         g_hat0 = predictions['ml_g0'][:, i_rep, 0]
-        g_hat1 = predictions['ml_g1'][:, i_rep, 0]
+        if score == 'ATE':
+            g_hat1 = predictions['ml_g1'][:, i_rep, 0]
+        else:
+            assert score == 'ATTE'
+            g_hat1 = y
 
         if score == 'ATE':
             weights = np.ones_like(d)
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index e20b0722..b1618ca9 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -37,6 +37,11 @@ def dml_blp_fixture(ci_joint, ci_level):
     ci_1 = blp.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
     np.random.seed(42)
     ci_2 = blp.confint(joint=ci_joint, level=ci_level, n_rep_boot=1000)
+    expected_ci_2 = np.vstack((
+        blp.blp_model.conf_int(alpha=(1-ci_level)/2)[0],
+        blp.blp_model.params,
+        blp.blp_model.conf_int(alpha=(1-ci_level)/2)[1])).T
+
     np.random.seed(42)
     ci_manual = blp_confint(blp_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
 
@@ -50,6 +55,7 @@ def dml_blp_fixture(ci_joint, ci_level):
                 'signal': blp.orth_signal,
                 'ci_1': ci_1,
                 'ci_2': ci_2,
+                'expected_ci_2': expected_ci_2,
                 'ci_manual': ci_manual,
                 'blp_model': blp,
                 'unfitted_blp_model': blp_obj}
@@ -79,14 +85,14 @@ def test_dml_blp_omega(dml_blp_fixture):
 
 
 @pytest.mark.ci
-def test_dml_blp_ci_1(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['ci_1'],
+def test_dml_blp_ci_2(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['expected_ci_2'],
                        dml_blp_fixture['ci_2'],
                        rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
-def test_dml_blp_ci_2(dml_blp_fixture):
+def test_dml_blp_ci_1(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['ci_1'],
                        dml_blp_fixture['ci_manual'],
                        rtol=1e-9, atol=1e-4)
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index cb39e8f0..2d414798 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -5,7 +5,7 @@
 from doubleml import DoubleMLData, DoubleMLClusterData
 from doubleml.datasets import fetch_401K, fetch_bonus, make_plr_CCDDHNR2018, make_plr_turrell2018, \
     make_irm_data, make_iivm_data, _make_pliv_data, make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, \
-    make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data
+    make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data, make_heterogeneous_data
 
 msg_inv_return_type = 'Invalid return_type.'
 
@@ -227,3 +227,36 @@ def test_make_confounded_plr_data_return_types():
     assert isinstance(res['oracle_values']['beta_a'], float)
     assert isinstance(res['oracle_values']['a'], np.ndarray)
     assert isinstance(res['oracle_values']['z'], np.ndarray)
+
+
+@pytest.fixture(scope='function',
+                params=[False, True])
+def binary_treatment(request):
+    return request.param
+
+
+@pytest.fixture(scope='function',
+                params=[1, 2])
+def n_x(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
+    np.random.seed(3141)
+    res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
+    assert isinstance(res, dict)
+    assert isinstance(res['data'], pd.DataFrame)
+    assert isinstance(res['effects'], np.ndarray)
+    assert callable(res['treatment_effect'])
+
+    # test input checks
+    msg = 'n_x must be either 1 or 2.'
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
+    msg = 'support_size must be smaller than p.'
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
+    msg = 'binary_treatment must be a boolean.'
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)
diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
new file mode 100644
index 00000000..89f437d3
--- /dev/null
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -0,0 +1,59 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLDID
+from doubleml.datasets import make_did_SZ2020
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from ._utils import draw_smpls
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": did_score,
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
+    dml_did = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_did.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = dml_did.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = dml_did.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
+
+    dml_did_ext = DoubleMLDID(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
+    dml_did_ext.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_did.coef[0], "coef_ext": dml_did_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_did_coef(doubleml_did_fixture):
+    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
new file mode 100644
index 00000000..631143ab
--- /dev/null
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -0,0 +1,62 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLDIDCS
+from doubleml.datasets import make_did_SZ2020
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from ._utils import draw_smpls
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+    dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": did_score,
+        "n_rep": n_rep,
+        "n_folds": 5,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
+    dml_did_cs = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_did_cs.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did_cs.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g_d0_t0"] = dml_did_cs.predictions["ml_g_d0_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t1"] = dml_did_cs.predictions["ml_g_d0_t1"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t0"] = dml_did_cs.predictions["ml_g_d1_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t1"] = dml_did_cs.predictions["ml_g_d1_t1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0]
+
+    dml_did_cs_ext = DoubleMLDIDCS(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
+    dml_did_cs_ext.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_did_cs_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_did_cs.coef[0], "coef_ext": dml_did_cs_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_didcs_coef(doubleml_didcs_fixture):
+    assert math.isclose(doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index bb5a75bd..0c705d3c 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -2,8 +2,9 @@
 import pandas as pd
 import numpy as np
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData,\
-    DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, DoubleMLDIDCS
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, \
+    DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, \
+    DoubleMLDIDCS, DoubleMLBLP
 from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
     make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
 from doubleml.double_ml_data import DoubleMLBaseData
@@ -425,6 +426,72 @@ def test_doubleml_exception_trimming_rule():
                           trimming_rule='truncate', trimming_threshold=0.6)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_weights():
+
+    msg = "weights must be a numpy array or dictionary. weights of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=1)
+    msg = r"weights must have keys \['weights', 'weights_bar'\]. keys dict_keys\(\['d'\]\) were passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights={'d': [1, 2, 3]})
+    msg = "weights must be a numpy array for ATTE score. weights of type <class 'dict'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        score='ATTE', weights={'weights': np.ones_like(dml_data_irm.d)})
+
+    # shape checks
+    msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(1))
+    msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 2)))
+
+    msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones(1), 'weights_bar': np.ones(1)})
+    msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones((n, 2)), 'weights_bar': np.ones((n, 2))})
+    msg = rf"weights_bar must have shape \({n}, 1\). weights_bar of shape \({n}, 2\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones(n), 'weights_bar': np.ones((n, 2))})
+
+    # value checks
+    msg = "All weights values must be greater or equal 0."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights=-1*np.ones(n,))
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': -1*np.ones(n,), 'weights_bar': np.ones((n, 1))})
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones(n,), 'weights_bar': -1*np.ones((n, 1))})
+
+    msg = "At least one weight must be non-zero."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights=np.zeros((dml_data_irm.d.shape[0], )))
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.zeros((dml_data_irm.d.shape[0], )),
+                                 'weights_bar': np.ones((dml_data_irm.d.shape[0], 1))})
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones((dml_data_irm.d.shape[0], )),
+                                 'weights_bar': np.zeros((dml_data_irm.d.shape[0], 1))})
+
+    msg = "weights must be binary for ATTE score."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        score='ATTE', weights=np.random.choice([0, 0.2], dml_data_irm.d.shape[0]))
+
+
 @pytest.mark.ci
 def test_doubleml_exception_quantiles():
     msg = "Quantile has to be a float. Object of type <class 'str'> passed."
@@ -1256,6 +1323,20 @@ def test_doubleml_nan_prediction():
         _ = DoubleMLPLR(dml_data, ml_l, LassoWithInfPred()).fit()
 
 
+@pytest.mark.ci
+def test_doubleml_warning_blp():
+    n = 5
+    np.random.seed(42)
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
+    random_signal = np.random.normal(0, 1, size=(n, ))
+    blp = DoubleMLBLP(random_signal, random_basis)
+    blp.fit()
+
+    msg = 'Returning pointwise confidence intervals for basis coefficients.'
+    with pytest.warns(UserWarning, match=msg):
+        _ = blp.confint(joint=True)
+
+
 @pytest.mark.ci
 def test_doubleml_exception_gate():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
@@ -1268,10 +1349,11 @@ def test_doubleml_exception_gate():
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.gate(groups=2)
+    groups = pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3)))
     msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
            'Alternatively, groups should only contain one column.')
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))))
+        dml_irm_obj.gate(groups=groups)
 
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
@@ -1280,10 +1362,10 @@ def test_doubleml_exception_gate():
                               n_folds=5,
                               score='ATTE')
     dml_irm_obj.fit()
-
+    groups = pd.DataFrame(np.random.choice([True, False], size=dml_data_irm.n_obs))
     msg = 'Invalid score ATTE. Valid score ATE.'
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.gate(groups=2)
+        dml_irm_obj.gate(groups=groups)
 
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
@@ -1296,7 +1378,7 @@ def test_doubleml_exception_gate():
 
     msg = 'Only implemented for one repetition. Number of repetitions is 2.'
     with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.gate(groups=2)
+        dml_irm_obj.gate(groups=groups)
 
 
 @pytest.mark.ci
@@ -1326,6 +1408,55 @@ def test_doubleml_exception_cate():
         dml_irm_obj.cate(basis=2)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_plr_cate():
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2,
+                              n_rep=2)
+    dml_plr_obj.fit()
+    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_plr_obj.cate(basis=2)
+
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2)
+    dml_plr_obj.fit(store_predictions=False)
+    msg = r'predictions are None. Call .fit\(store_predictions=True\) to store the predictions.'
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj.cate(basis=2)
+
+    dml_data_multiple_treat = DoubleMLData(dml_data.data, y_col="y", d_cols=['d', 'X1'])
+    dml_plr_obj_multiple = DoubleMLPLR(dml_data_multiple_treat,
+                                       ml_l=Lasso(),
+                                       ml_m=Lasso(),
+                                       n_folds=2)
+    dml_plr_obj_multiple.fit()
+    msg = 'Only implemented for single treatment. Number of treatments is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_plr_obj_multiple.cate(basis=2)
+
+
+@pytest.mark.ci
+def test_doubleml_exception_plr_gate():
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2,
+                              n_rep=1)
+    dml_plr_obj.fit()
+    msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_plr_obj.gate(groups=2)
+    msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
+           'Alternatively, groups should only contain one column.')
+    with pytest.raises(TypeError, match=msg):
+        dml_plr_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data.n_obs, 3))))
+
+
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
@@ -1402,3 +1533,84 @@ def test_doubleml_exception_policytree():
     msg = 'Only implemented for one repetition. Number of repetitions is 2.'
     with pytest.raises(NotImplementedError, match=msg):
         dml_irm_obj.policy_tree(features=2, depth=1)
+
+
+@pytest.mark.ci
+def test_double_ml_external_predictions():
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=2)
+
+    msg = "external_predictions must be a dictionary. ml_m of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(external_predictions="ml_m")
+
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=1)
+
+    predictions = {'d': 'test', 'd_f': 'test'}
+    msg = (r"Invalid external_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
+           "Valid treatment variables d.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': 'test'}
+    msg = ("external_predictions must be a nested dictionary. "
+           "For treatment d a value of type <class 'str'> was passed.")
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': {'ml_f': 'test'}}
+    msg = ("Invalid external_predictions. "
+           r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
+           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': {'ml_m': 'test', 'ml_f': 'test'}}
+    msg = ("Invalid external_predictions. "
+           r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
+           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': {'ml_m': 'test'}}
+    msg = ("Invalid external_predictions. "
+           "The values of the nested list must be a numpy array. "
+           "Invalid predictions for treatment d and learner ml_m. "
+           "Object of type <class 'str'> was passed.")
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.array([0])}}
+    msg = ('Invalid external_predictions. '
+           r'The supplied predictions have to be of shape \(100, 1\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(1,\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.zeros(100)}}
+    msg = ('Invalid external_predictions. '
+           r'The supplied predictions have to be of shape \(100, 1\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(100,\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
+    msg = ('Invalid external_predictions. '
+           r'The supplied predictions have to be of shape \(100, 1\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(5, 3\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
diff --git a/doubleml/tests/test_doubleml_exceptions_ext_preds.py b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
new file mode 100644
index 00000000..395d8bf5
--- /dev/null
+++ b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
@@ -0,0 +1,23 @@
+import pytest
+from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+
+df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
+ext_predictions = {"d": {}}
+
+
+@pytest.mark.ci
+def test_cvar_external_prediction_exception():
+    msg = "External predictions not implemented for DoubleMLCVAR."
+    with pytest.raises(NotImplementedError, match=msg):
+        cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), DMLDummyRegressor(), DMLDummyClassifier(), treatment=1)
+        cvar.fit(external_predictions=ext_predictions)
+
+
+@pytest.mark.ci
+def test_qte_external_prediction_exception():
+    msg = "External predictions not implemented for DoubleMLQTE."
+    with pytest.raises(NotImplementedError, match=msg):
+        qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), DMLDummyClassifier(), DMLDummyClassifier())
+        qte.fit(external_predictions=ext_predictions)
diff --git a/doubleml/tests/test_doubleml_model_defaults.py b/doubleml/tests/test_doubleml_model_defaults.py
index 9298dba3..7c2d7b38 100644
--- a/doubleml/tests/test_doubleml_model_defaults.py
+++ b/doubleml/tests/test_doubleml_model_defaults.py
@@ -102,6 +102,8 @@ def test_irm_defaults():
     assert dml_irm.trimming_rule == 'truncate'
     assert dml_irm.trimming_threshold == 1e-2
     assert not dml_irm.normalize_ipw
+    assert set(dml_irm.weights.keys()) == set(['weights'])
+    assert np.array_equal(dml_irm.weights['weights'], np.ones((dml_irm._dml_data.n_obs,)))
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_dummy_learners.py b/doubleml/tests/test_dummy_learners.py
new file mode 100644
index 00000000..c23088fa
--- /dev/null
+++ b/doubleml/tests/test_dummy_learners.py
@@ -0,0 +1,46 @@
+import pytest
+import numpy as np
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from sklearn.base import clone
+
+
+@pytest.fixture(scope="module")
+def dl_fixture():
+    fixture = {
+        "DMLDummyRegressor": DMLDummyRegressor(),
+        "DMLDummyClassifier": DMLDummyClassifier(),
+        "X": np.random.normal(0, 1, size=(100, 10)),
+        "y_con": np.random.normal(0, 1, size=(100, 1)),
+        "y_cat": np.random.binomial(1, 0.5, size=(100, 1)),
+    }
+
+    return fixture
+
+
+@pytest.mark.ci
+def test_fit(dl_fixture):
+    msg = "Accessed fit method of DMLDummyRegressor!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["DMLDummyRegressor"].fit(dl_fixture["X"], dl_fixture["y_con"])
+    msg = "Accessed fit method of DMLDummyClassifier!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["DMLDummyClassifier"].fit(dl_fixture["X"], dl_fixture["y_cat"])
+
+
+@pytest.mark.ci
+def test_predict(dl_fixture):
+    msg = "Accessed predict method of DMLDummyRegressor!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["DMLDummyRegressor"].predict(dl_fixture["X"])
+    msg = "Accessed predict method of DMLDummyClassifier!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["DMLDummyClassifier"].predict(dl_fixture["X"])
+
+
+@pytest.mark.ci
+def test_clone(dl_fixture):
+    try:
+        _ = clone(dl_fixture["DMLDummyRegressor"])
+        _ = clone(dl_fixture["DMLDummyClassifier"])
+    except Exception as e:
+        pytest.fail(f"clone() raised an exception:\n{str(e)}\n")
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
new file mode 100644
index 00000000..96d59018
--- /dev/null
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -0,0 +1,74 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLIIVM, DoubleMLData
+from doubleml.datasets import make_iivm_data
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    data = make_iivm_data(
+        n_obs=500, dim_x=20, theta=0.5, alpha_x=1.0, return_type="DataFrame"
+    )
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData(data, "y", "d", z_cols="z")
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "LATE",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+    }
+
+    dml_iivm = DoubleMLIIVM(
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ml_r=LogisticRegression(),
+        **kwargs,
+    )
+    np.random.seed(3141)
+
+    dml_iivm.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = dml_iivm.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = dml_iivm.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_iivm.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_r0"] = dml_iivm.predictions["ml_r0"][:, :, 0]
+    ext_predictions["d"]["ml_r1"] = dml_iivm.predictions["ml_r1"][:, :, 0]
+
+    dml_iivm_ext = DoubleMLIIVM(
+        ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), ml_r=DMLDummyClassifier(), **kwargs
+    )
+
+    np.random.seed(3141)
+    dml_iivm_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_iivm.coef[0], "coef_ext": dml_iivm_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"],
+        adapted_doubleml_fixture["coef_ext"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index e4842ffa..e56eea65 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -10,6 +10,7 @@
 
 import doubleml as dml
 from doubleml.datasets import make_irm_data
+from doubleml._utils_resampling import DoubleMLResampling
 
 from ._utils import draw_smpls
 from ._utils_irm_manual import fit_irm, boot_irm, fit_sensitivity_elements_irm
@@ -87,10 +88,31 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
                          normalize_ipw=normalize_ipw,
                          trimming_threshold=trimming_threshold)
 
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    dml_irm_obj_ext = dml.DoubleMLIRM(obj_dml_data,
+                                      ml_g, ml_m,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure,
+                                      normalize_ipw=normalize_ipw,
+                                      draw_sample_splitting=False,
+                                      trimming_threshold=trimming_threshold)
+
+    # synchronize the sample splitting
+    dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1, 1),
+                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1, 1),
+                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1, 1)}}
+    dml_irm_obj_ext.fit(external_predictions=prediction_dict)
+
     res_dict = {'coef': dml_irm_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_irm_obj_ext.coef,
                 'se': dml_irm_obj.se,
                 'se_manual': res_manual['se'],
+                'se_ext': dml_irm_obj_ext.se,
                 'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
@@ -104,10 +126,14 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_irm_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         res_dict['boot_coef' + bootstrap] = dml_irm_obj.boot_coef
         res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
         res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
         res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
+        res_dict['boot_coef' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_coef
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_t_stat
 
     # sensitivity tests
     res_dict['sensitivity_elements'] = dml_irm_obj.sensitivity_elements
@@ -125,16 +151,22 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
 @pytest.mark.ci
 def test_dml_irm_coef(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['coef'],
+    assert math.isclose(dml_irm_fixture['coef'][0],
                         dml_irm_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture['coef'][0],
+                        dml_irm_fixture['coef_ext'][0],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_se(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['se'],
+    assert math.isclose(dml_irm_fixture['se'][0],
                         dml_irm_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture['se'][0],
+                        dml_irm_fixture['se_ext'][0],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -143,9 +175,15 @@ def test_dml_irm_boot(dml_irm_fixture):
         assert np.allclose(dml_irm_fixture['boot_coef' + bootstrap],
                            dml_irm_fixture['boot_coef' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_irm_fixture['boot_coef' + bootstrap],
+                           dml_irm_fixture['boot_coef' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
         assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
                            dml_irm_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
+                           dml_irm_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
@@ -199,7 +237,7 @@ def test_dml_irm_cate_gate():
         gate_1 = dml_irm_obj.gate(groups_1)
     assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
-    assert all(gate_1.confint().index == groups_1.columns)
+    assert all(gate_1.confint().index == groups_1.columns.to_list())
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -209,3 +247,115 @@ def test_dml_irm_cate_gate():
     assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
     assert all(gate_2.confint().index == ["Group_1", "Group_2"])
+
+
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope='module')
+def dml_irm_weights_fixture(n_rep, dml_procedure):
+    n = 10000
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
+    kwargs = {
+        "trimming_threshold": 0.05,
+        "n_folds": 5,
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
+
+    smpls = DoubleMLResampling(
+        n_folds=5,
+        n_rep=n_rep,
+        n_obs=n,
+        apply_cross_fitting=True,
+        stratify=obj_dml_data.d).split_samples()
+
+    # First stage estimation
+    ml_g = LinearRegression()
+    ml_m = LogisticRegression(penalty='l2', random_state=42)
+
+    # ATE with and without weights
+    dml_irm_obj_ate_no_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATE',
+        **kwargs)
+    dml_irm_obj_ate_no_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_ate_no_weights.fit()
+
+    dml_irm_obj_ate_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATE',
+        weights=np.ones_like(obj_dml_data.y), **kwargs)
+    dml_irm_obj_ate_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_ate_weights.fit()
+
+    # ATTE with and without weights
+    dml_irm_obj_atte_no_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATTE',
+        **kwargs)
+    dml_irm_obj_atte_no_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_atte_no_weights.fit()
+
+    m_hat = dml_irm_obj_atte_no_weights.predictions["ml_m"][:, :, 0]
+    p_hat = obj_dml_data.d.mean()
+    weights = obj_dml_data.d / p_hat
+    weights_bar = m_hat / p_hat
+    weight_dict = {'weights': weights, 'weights_bar': weights_bar}
+    dml_irm_obj_atte_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATE',
+        weights=weight_dict, **kwargs)
+    dml_irm_obj_atte_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_atte_weights.fit()
+
+    res_dict = {
+        'coef_ate': dml_irm_obj_ate_no_weights.coef,
+        'coef_ate_weights': dml_irm_obj_ate_weights.coef,
+        'coef_atte': dml_irm_obj_atte_no_weights.coef,
+        'coef_atte_weights': dml_irm_obj_atte_weights.coef,
+        'se_ate': dml_irm_obj_ate_no_weights.se,
+        'se_ate_weights': dml_irm_obj_ate_weights.se,
+        'se_atte': dml_irm_obj_atte_no_weights.se,
+        'se_atte_weights': dml_irm_obj_atte_weights.se,
+    }
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_irm_ate_weights(dml_irm_weights_fixture):
+    assert math.isclose(dml_irm_weights_fixture['coef_ate'],
+                        dml_irm_weights_fixture['coef_ate_weights'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_weights_fixture['se_ate'],
+                        dml_irm_weights_fixture['se_ate_weights'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_irm_atte_weights(dml_irm_weights_fixture):
+    assert math.isclose(dml_irm_weights_fixture['coef_atte'],
+                        dml_irm_weights_fixture['coef_atte_weights'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+    # Remark that the scores are slightly different (Y instead of g(1,X) and coefficient of theta)
+    assert math.isclose(dml_irm_weights_fixture['se_atte'],
+                        dml_irm_weights_fixture['se_atte_weights'],
+                        rel_tol=1e-5, abs_tol=1e-3)
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
new file mode 100644
index 00000000..7e89a320
--- /dev/null
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -0,0 +1,77 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLIRM, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+
+
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
+def irm_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g_ext):
+    ext_predictions = {"d": {}}
+
+    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    dml_irm = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    dml_irm.fit(store_predictions=True)
+
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = dml_irm.predictions["ml_m"][:, :, 0]
+        ml_m = DMLDummyClassifier()
+    else:
+        ml_m = LogisticRegression(random_state=42)
+
+    if set_ml_g_ext:
+        ext_predictions["d"]["ml_g0"] = dml_irm.predictions["ml_g0"][:, :, 0]
+        ext_predictions["d"]["ml_g1"] = dml_irm.predictions["ml_g1"][:, :, 0]
+        ml_g = DMLDummyRegressor()
+    else:
+        ml_g = LinearRegression()
+
+    dml_irm_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
+
+    np.random.seed(3141)
+    dml_irm_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_irm.coef[0], "coef_ext": dml_irm_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_irm_coef(doubleml_irm_fixture):
+    assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/tests/test_irm_weighted_scores.py b/doubleml/tests/test_irm_weighted_scores.py
new file mode 100644
index 00000000..0994f10f
--- /dev/null
+++ b/doubleml/tests/test_irm_weighted_scores.py
@@ -0,0 +1,132 @@
+import pytest
+import numpy as np
+
+from sklearn.base import clone
+from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+
+import doubleml as dml
+
+from .._utils import _normalize_ipw
+
+
+def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
+    # fraction of treated for ATTE
+    p_hat = None
+    if score == 'ATTE':
+        p_hat = np.mean(d)
+
+    if normalize_ipw:
+        m_hat = _normalize_ipw(m_hat, d)
+
+    # compute residuals
+    u_hat0 = y - g_hat0
+    u_hat1 = None
+    if score == 'ATE':
+        u_hat1 = y - g_hat1
+
+    psi_a = np.full_like(y, np.nan)
+    psi_b = np.full_like(y, np.nan)
+    if score == 'ATE':
+        psi_b = g_hat1 - g_hat0 \
+            + np.divide(np.multiply(d, u_hat1), m_hat) \
+            - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
+        psi_a = np.full_like(m_hat, -1.0)
+    else:
+        assert score == 'ATTE'
+        psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
+            - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
+                        np.multiply(p_hat, (1.0 - m_hat)))
+        psi_a = - np.divide(d, p_hat)
+
+    return psi_a, psi_b
+
+
+@pytest.fixture(scope='module',
+                params=[[LinearRegression(),
+                         LogisticRegression(solver='lbfgs', max_iter=250)],
+                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=['ATE', 'ATTE'])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=[False, True])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=[0.2, 0.15])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope='module')
+def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold):
+    n_folds = 2
+
+    # collect data
+    (x, y, d) = generate_data_irm
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    np.random.seed(3141)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
+                                  ml_g, ml_m,
+                                  n_folds,
+                                  score=score,
+                                  normalize_ipw=normalize_ipw,
+                                  trimming_threshold=trimming_threshold)
+    dml_irm_obj.fit()
+
+    # old score
+    psi_a_old, psi_b_old = old_score_elements(
+        y=y,
+        d=d,
+        g_hat0=np.squeeze(dml_irm_obj.predictions['ml_g0']),
+        g_hat1=np.squeeze(dml_irm_obj.predictions['ml_g1']),
+        m_hat=np.squeeze(dml_irm_obj.predictions['ml_m']),
+        score=score,
+        normalize_ipw=normalize_ipw
+    )
+
+    old_coef = -np.mean(psi_b_old) / np.mean(psi_a_old)
+
+    result_dict = {
+        'psi_a': np.squeeze(dml_irm_obj.psi_elements['psi_a']),
+        'psi_b': np.squeeze(dml_irm_obj.psi_elements['psi_b']),
+        'psi_a_old': psi_a_old,
+        'psi_b_old': psi_b_old,
+        'coef': np.squeeze(dml_irm_obj.coef),
+        'old_coef': old_coef,
+    }
+    return result_dict
+
+
+@pytest.mark.ci
+def test_irm_old_vs_weighted_score_psi_b(old_vs_weighted_score_fixture):
+    assert np.allclose(old_vs_weighted_score_fixture['psi_b'],
+                       old_vs_weighted_score_fixture['psi_b_old'])
+
+
+@pytest.mark.ci
+def test_irm_old_vs_weighted_score_psi_a(old_vs_weighted_score_fixture):
+    assert np.allclose(old_vs_weighted_score_fixture['psi_a'],
+                       old_vs_weighted_score_fixture['psi_a_old'])
+
+
+@pytest.mark.ci
+def test_irm_old_vs_weighted_coef(old_vs_weighted_score_fixture):
+    assert np.allclose(old_vs_weighted_score_fixture['coef'],
+                       old_vs_weighted_score_fixture['old_coef'])
diff --git a/doubleml/tests/test_lpq.py b/doubleml/tests/test_lpq.py
index e7550e06..beb8b6a0 100644
--- a/doubleml/tests/test_lpq.py
+++ b/doubleml/tests/test_lpq.py
@@ -119,9 +119,9 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
                              normalize_ipw=normalize_ipw, kde=kde,
                              n_rep=1, trimming_threshold=trimming_threshold)
 
-    res_dict = {'coef': dml_lpq_obj.coef,
+    res_dict = {'coef': dml_lpq_obj.coef[0],
                 'coef_manual': res_manual['lpq'],
-                'se': dml_lpq_obj.se,
+                'se': dml_lpq_obj.se[0],
                 'se_manual': res_manual['se']}
 
     return res_dict
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
new file mode 100644
index 00000000..fbe0e742
--- /dev/null
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -0,0 +1,72 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LogisticRegression
+from doubleml import DoubleMLLPQ, DoubleMLData
+from doubleml.datasets import make_iivm_data
+from doubleml.utils import DMLDummyClassifier
+from ._utils import draw_smpls
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
+    ext_predictions = {"d": {}}
+    np.random.seed(3141)
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type="DataFrame")
+
+    dml_data = DoubleMLData(data, "y", "d", z_cols="z")
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "LPQ",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "normalize_ipw": normalize_ipw,
+        "draw_sample_splitting": False,
+    }
+
+    ml_g = LogisticRegression()
+    ml_m = LogisticRegression()
+
+    dml_lpq = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_lpq.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    dml_lpq.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_m_z"] = dml_lpq.predictions["ml_m_z"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z0"] = dml_lpq.predictions["ml_m_d_z0"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z1"] = dml_lpq.predictions["ml_m_d_z1"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z0"] = dml_lpq.predictions["ml_g_du_z0"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z1"] = dml_lpq.predictions["ml_g_du_z1"][:, :, 0]
+
+    dml_lpq_ext = DoubleMLLPQ(ml_g=DMLDummyClassifier(), ml_m=DMLDummyClassifier(), **kwargs)
+    dml_lpq_ext.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    dml_lpq_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_lpq.coef[0], "coef_ext": dml_lpq_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_lpq_coef(doubleml_lpq_fixture):
+    assert math.isclose(doubleml_lpq_fixture["coef_normal"], doubleml_lpq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index 2aca4f7a..ede9a0a1 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -100,7 +100,7 @@ def _check_score(self, score):
     def _check_data(self, obj_dml_data):
         pass
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
new file mode 100644
index 00000000..89d63b8f
--- /dev/null
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression
+from doubleml import DoubleMLPLIV, DoubleMLData
+from doubleml.datasets import make_pliv_CHS2015
+from doubleml.utils import DMLDummyRegressor
+
+
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def dim_z(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
+    # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
+    if dim_z > 1 and score == "IV-type":
+        pytest.skip("IV-type score only allows dim_z = 1")
+        res_dict = None
+    else:
+        ext_predictions = {"d": {}}
+
+        data = make_pliv_CHS2015(
+            n_obs=500, dim_x=20, alpha=0.5, dim_z=dim_z, return_type="DataFrame"
+        )
+
+        np.random.seed(3141)
+
+        z_cols = [f"Z{i}" for i in range(1, dim_z + 1)]
+        dml_data = DoubleMLData(data, "y", "d", z_cols=z_cols)
+
+        kwargs = {
+            "obj_dml_data": dml_data,
+            "score": score,
+            "n_rep": n_rep,
+            "dml_procedure": dml_procedure,
+        }
+
+        if score == "IV-type":
+            kwargs["ml_g"] = LinearRegression()
+
+        dml_pliv = DoubleMLPLIV(
+            ml_m=LinearRegression(),
+            ml_l=LinearRegression(),
+            ml_r=LinearRegression(),
+            **kwargs,
+        )
+        np.random.seed(3141)
+
+        dml_pliv.fit(store_predictions=True)
+
+        ext_predictions["d"]["ml_l"] = dml_pliv.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_r"] = dml_pliv.predictions["ml_r"][:, :, 0]
+
+        if dim_z == 1:
+            ext_predictions["d"]["ml_m"] = dml_pliv.predictions["ml_m"][:, :, 0]
+            if score == "IV-type":
+                kwargs["ml_g"] = DMLDummyRegressor()
+                ext_predictions["d"]["ml_g"] = dml_pliv.predictions["ml_g"][:, :, 0]
+        else:
+            for instr in range(dim_z):
+                ml_m_key = "ml_m_" + "Z" + str(instr + 1)
+                ext_predictions["d"][ml_m_key] = dml_pliv.predictions[ml_m_key][:, :, 0]
+
+        dml_pliv_ext = DoubleMLPLIV(
+            ml_m=DMLDummyRegressor(), ml_l=DMLDummyRegressor(), ml_r=DMLDummyRegressor(), **kwargs
+        )
+
+        np.random.seed(3141)
+        dml_pliv_ext.fit(external_predictions=ext_predictions)
+
+        res_dict = {"coef_normal": dml_pliv.coef[0], "coef_ext": dml_pliv_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"],
+        adapted_doubleml_fixture["coef_ext"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index a27ee4a2..793d5b86 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -1,7 +1,8 @@
-import numpy as np
 import pytest
 import math
 import scipy
+import numpy as np
+import pandas as pd
 
 from sklearn.base import clone
 
@@ -77,10 +78,43 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
     res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
                          all_smpls, dml_procedure, score)
 
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
+
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
+
+
     res_dict = {'coef': dml_plr_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
                 'se': dml_plr_obj.se,
                 'se_manual': res_manual['se'],
+                'se_ext': dml_plr_obj_ext.se,
                 'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
@@ -91,10 +125,14 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_plr_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef
         res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
         res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
         res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
+        res_dict['boot_coef' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_coef
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_t_stat
 
     # sensitivity tests
     res_dict['sensitivity_elements'] = dml_plr_obj.sensitivity_elements
@@ -114,6 +152,9 @@ def test_dml_plr_coef(dml_plr_fixture):
     assert math.isclose(dml_plr_fixture['coef'],
                         dml_plr_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['coef'],
+                        dml_plr_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -121,6 +162,9 @@ def test_dml_plr_se(dml_plr_fixture):
     assert math.isclose(dml_plr_fixture['se'],
                         dml_plr_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['se'],
+                        dml_plr_fixture['se_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -129,9 +173,15 @@ def test_dml_plr_boot(dml_plr_fixture):
         assert np.allclose(dml_plr_fixture['boot_coef' + bootstrap],
                            dml_plr_fixture['boot_coef' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_coef' + bootstrap],
+                           dml_plr_fixture['boot_coef' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
         assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
                            dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
+                           dml_plr_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
@@ -282,3 +332,46 @@ def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
         assert np.allclose(dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap],
                            dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_cate_gate(score, dml_procedure):
+    n = 9
+
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = dml.datasets.make_plr_CCDDHNR2018(n_obs=n)
+    ml_l = LinearRegression()
+    ml_g = LinearRegression()
+    ml_m = LinearRegression()
+
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
+                                  ml_g, ml_m, ml_l,
+                                  n_folds=2,
+                                  score=score,
+                                  dml_procedure=dml_procedure)
+    dml_plr_obj.fit()
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
+    cate = dml_plr_obj.cate(random_basis)
+    assert isinstance(cate, dml.DoubleMLBLP)
+    assert isinstance(cate.confint(), pd.DataFrame)
+
+    groups_1 = pd.DataFrame(
+        np.column_stack([obj_dml_data.data['X1'] <= 0,
+                         obj_dml_data.data['X1'] > 0.2]),
+        columns=['Group 1', 'Group 2'])
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_1 = dml_plr_obj.gate(groups_1)
+    assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(gate_1.confint(), pd.DataFrame)
+    assert all(gate_1.confint().index == groups_1.columns.tolist())
+
+    np.random.seed(42)
+    groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_2 = dml_plr_obj.gate(groups_2)
+    assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(gate_2.confint(), pd.DataFrame)
+    assert all(gate_2.confint().index == ["Group_1", "Group_2"])
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
new file mode 100644
index 00000000..58c987f5
--- /dev/null
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -0,0 +1,96 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression
+from doubleml import DoubleMLPLR, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.utils import DMLDummyRegressor
+
+
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
+def plr_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_l_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l_ext, set_ml_g_ext):
+    ext_predictions = {"d": {}}
+
+    x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": plr_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    if plr_score == "IV-type":
+        kwargs["ml_g"] = LinearRegression()
+
+    dml_plr = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
+    np.random.seed(3141)
+
+    dml_plr.fit(store_predictions=True)
+
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = dml_plr.predictions["ml_m"][:, :, 0]
+        ml_m = DMLDummyRegressor()
+    else:
+        ml_m = LinearRegression()
+
+    if set_ml_l_ext:
+        ext_predictions["d"]["ml_l"] = dml_plr.predictions["ml_l"][:, :, 0]
+        ml_l = DMLDummyRegressor()
+    else:
+        ml_l = LinearRegression()
+
+    if plr_score == "IV-type" and set_ml_g_ext:
+        ext_predictions["d"]["ml_g"] = dml_plr.predictions["ml_g"][:, :, 0]
+        kwargs["ml_g"] = DMLDummyRegressor()
+    elif plr_score == "IV-type" and not set_ml_g_ext:
+        kwargs["ml_g"] = LinearRegression()
+    else:
+        pass
+
+    if plr_score == "IV-type" and set_ml_g_ext and not set_ml_l_ext:
+        ml_l = DMLDummyRegressor()
+
+    # special case if ml_l is not needed
+    dml_plr_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
+
+    np.random.seed(3141)
+    dml_plr_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": dml_plr.coef[0], "coef_ext": dml_plr_ext.coef[0]}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_plr_coef(doubleml_plr_fixture):
+    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/tests/test_plr_rep_cross.py b/doubleml/tests/test_plr_rep_cross.py
index f2a50e21..9bbc2616 100644
--- a/doubleml/tests/test_plr_rep_cross.py
+++ b/doubleml/tests/test_plr_rep_cross.py
@@ -75,12 +75,45 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
     res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner),
                          all_smpls, dml_procedure, score, n_rep)
 
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds,
+                                      n_rep,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      n_rep,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, n_rep)}}
+
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
+
     res_dict = {'coef': dml_plr_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
                 'se': dml_plr_obj.se,
                 'se_manual': res_manual['se'],
-                'boot_methods': boot_methods
-                }
+                'se_ext': dml_plr_obj_ext.se,
+                'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
new file mode 100644
index 00000000..c814378d
--- /dev/null
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -0,0 +1,101 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LogisticRegression
+from doubleml import DoubleMLPQ, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import DMLDummyClassifier
+from ._utils import draw_smpls
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
+    ext_predictions = {"d": {}}
+    np.random.seed(3141)
+    data = make_irm_data(theta=1, n_obs=500, dim_x=5, return_type="DataFrame")
+
+    dml_data = DoubleMLData(data, "y", "d")
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "PQ",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "normalize_ipw": normalize_ipw,
+        "draw_sample_splitting": False,
+    }
+
+    ml_m = LogisticRegression(random_state=42)
+    ml_g = LogisticRegression(random_state=42)
+
+    dml_pq = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_pq.set_sample_splitting(all_smpls)
+    np.random.seed(3141)
+    dml_pq.fit(store_predictions=True)
+
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = dml_pq.predictions["ml_m"][:, :, 0]
+        ml_m = DMLDummyClassifier()
+    else:
+        ml_m = LogisticRegression(random_state=42)
+
+    if set_ml_g_ext:
+        ext_predictions["d"]["ml_g"] = dml_pq.predictions["ml_g"][:, :, 0]
+        ml_g = DMLDummyClassifier()
+    else:
+        ml_g = LogisticRegression(random_state=42)
+
+    dml_pq_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_pq_ext.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    dml_pq_ext.fit(external_predictions=ext_predictions)
+
+    if set_ml_m_ext and not set_ml_g_ext:
+        # adjust tolerance for the case that ml_m is set to external predictions
+        # because no preliminary results are available for ml_m, the model use the (external) final predictions for ml_m
+        tol_rel = 0.1
+        tol_abs = 0.1
+    else:
+        tol_rel = 1e-9
+        tol_abs = 1e-4
+
+    res_dict = {"coef_normal": dml_pq.coef[0], "coef_ext": dml_pq_ext.coef[0], "tol_rel": tol_rel, "tol_abs": tol_abs}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_pq_coef(doubleml_pq_fixture):
+    assert math.isclose(
+        doubleml_pq_fixture["coef_normal"],
+        doubleml_pq_fixture["coef_ext"],
+        rel_tol=doubleml_pq_fixture["tol_rel"],
+        abs_tol=doubleml_pq_fixture["tol_abs"],
+    )
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
new file mode 100644
index 00000000..3bdd77cc
--- /dev/null
+++ b/doubleml/utils/__init__.py
@@ -0,0 +1,9 @@
+from .dummy_learners import DMLDummyRegressor
+from .dummy_learners import DMLDummyClassifier
+from .gain_statistics import gain_statistics
+
+__all__ = [
+    "DMLDummyRegressor",
+    "DMLDummyClassifier",
+    "gain_statistics"
+]
diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
new file mode 100644
index 00000000..129fd18d
--- /dev/null
+++ b/doubleml/utils/dummy_learners.py
@@ -0,0 +1,76 @@
+from sklearn.base import BaseEstimator
+
+
+class DMLDummyRegressor(BaseEstimator):
+    """
+    A dummy regressor that raises an AttributeError when attempting to access
+    its fit, predict, or set_params methods.
+
+    Parameters
+    ----------
+
+    """
+
+    _estimator_type = "regressor"
+
+    def fit(*args):
+        """
+        Raises AttributeError: "Accessed fit method of DummyRegressor!"
+        """
+
+        raise AttributeError("Accessed fit method of DMLDummyRegressor!")
+
+    def predict(*args):
+        """
+        Raises AttributeError: "Accessed predict method of DummyRegressor!"
+        """
+
+        raise AttributeError("Accessed predict method of DMLDummyRegressor!")
+
+    def set_params(*args):
+        """
+        Raises AttributeError: "Accessed set_params method of DummyRegressor!"
+        """
+
+        raise AttributeError("Accessed set_params method of DMLDummyRegressor!")
+
+
+class DMLDummyClassifier(BaseEstimator):
+    """
+    A dummy classifier that raises an AttributeError when attempting to access
+    its fit, predict, set_params, or predict_proba methods.
+
+    Parameters
+    ----------
+
+    """
+
+    _estimator_type = "classifier"
+
+    def fit(*args):
+        """
+        Raises AttributeError: "Accessed fit method of DummyClassifier!"
+        """
+
+        raise AttributeError("Accessed fit method of DMLDummyClassifier!")
+
+    def predict(*args):
+        """
+        Raises AttributeError: "Accessed predict method of DummyClassifier!"
+        """
+
+        raise AttributeError("Accessed predict method of DMLDummyClassifier!")
+
+    def set_params(*args):
+        """
+        Raises AttributeError: "Accessed set_params method of DummyClassifier!"
+        """
+
+        raise AttributeError("Accessed set_params method of DMLDummyClassifier!")
+
+    def predict_proba(*args, **kwargs):
+        """
+        Raises AttributeError: "Accessed predict_proba method of DummyClassifier!"
+        """
+
+        raise AttributeError("Accessed predict_proba method of DMLDummyClassifier!")
diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
new file mode 100644
index 00000000..3c50d084
--- /dev/null
+++ b/doubleml/utils/gain_statistics.py
@@ -0,0 +1,107 @@
+import numpy as np
+
+
+def gain_statistics(dml_long, dml_short):
+    """
+    Compute gain statistics as benchmark values for sensitivity parameters ``cf_d`` and ``cf_y``.
+
+    Parameters
+    ----------
+
+    dml_long :
+        :class:`doubleml.DoubleML` model including all observed confounders
+
+    dml_short :
+        :class:`doubleml.DoubleML` model that excludes one or several benchmark confounders
+
+    Returns
+    --------
+    benchmark_dict : dict
+        Benchmarking dictionary (dict) with values for ``cf_d``, ``cf_y``, ``rho``, and ``delta_theta``.
+    """
+    if not isinstance(dml_long.sensitivity_elements, dict):
+        raise TypeError("dml_long does not contain the necessary sensitivity elements. "
+                        "Expected dict for dml_long.sensitivity_elements.")
+    expected_keys = ['sigma2', 'nu2']
+    if not all(key in dml_long.sensitivity_elements.keys() for key in expected_keys):
+        raise ValueError("dml_long does not contain the necessary sensitivity elements. "
+                         "Required keys are: " + str(expected_keys))
+    if not isinstance(dml_short.sensitivity_elements, dict):
+        raise TypeError("dml_short does not contain the necessary sensitivity elements. "
+                        "Expected dict for dml_short.sensitivity_elements.")
+    if not all(key in dml_short.sensitivity_elements.keys() for key in expected_keys):
+        raise ValueError("dml_short does not contain the necessary sensitivity elements. "
+                         "Required keys are: " + str(expected_keys))
+
+    for key in expected_keys:
+        if not isinstance(dml_long.sensitivity_elements[key], np.ndarray):
+            raise TypeError("dml_long does not contain the necessary sensitivity elements. "
+                            f"Expected numpy.ndarray for key {key}.")
+        if not isinstance(dml_short.sensitivity_elements[key], np.ndarray):
+            raise TypeError("dml_short does not contain the necessary sensitivity elements. "
+                            f"Expected numpy.ndarray for key {key}.")
+        if len(dml_long.sensitivity_elements[key].shape) != 3 or dml_long.sensitivity_elements[key].shape[0] != 1:
+            raise ValueError("dml_long does not contain the necessary sensitivity elements. "
+                             f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.")
+        if len(dml_short.sensitivity_elements[key].shape) != 3 or dml_short.sensitivity_elements[key].shape[0] != 1:
+            raise ValueError("dml_short does not contain the necessary sensitivity elements. "
+                             f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.")
+        if not np.array_equal(dml_long.sensitivity_elements[key].shape, dml_short.sensitivity_elements[key].shape):
+            raise ValueError("dml_long and dml_short do not contain the same shape of sensitivity elements. "
+                             "Shapes of " + key + " are: " + str(dml_long.sensitivity_elements[key].shape) +
+                             " and " + str(dml_short.sensitivity_elements[key].shape))
+
+    if not isinstance(dml_long.all_coef, np.ndarray):
+        raise TypeError("dml_long.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
+    if not isinstance(dml_short.all_coef, np.ndarray):
+        raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
+
+    expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[2], dml_long.sensitivity_elements['sigma2'].shape[1])
+    if dml_long.all_coef.shape != expected_shape:
+        raise ValueError("dml_long.all_coef does not contain the necessary coefficients. Expected shape: " +
+                         str(expected_shape))
+    if dml_short.all_coef.shape != expected_shape:
+        raise ValueError("dml_short.all_coef does not contain the necessary coefficients. Expected shape: " +
+                         str(expected_shape))
+
+    # save elements for readability
+    var_y = np.var(dml_long._dml_data.y)
+    var_y_residuals_long = np.squeeze(dml_long.sensitivity_elements['sigma2'], axis=0)
+    nu2_long = np.squeeze(dml_long.sensitivity_elements['nu2'], axis=0)
+    var_y_residuals_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0)
+    nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0)
+
+    # compute nonparametric R2
+    R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y)
+    R2_y_short = 1.0 - np.divide(var_y_residuals_short, var_y)
+    R2_riesz = np.divide(nu2_short, nu2_long)
+
+    # Gain statistics
+    all_cf_y_benchmark = np.clip(np.divide((R2_y_long - R2_y_short), (1.0 - R2_y_long)), 0, 1)
+    all_cf_d_benchmark = np.clip(np.divide((1.0 - R2_riesz), R2_riesz), 0, 1)
+    cf_y_benchmark = np.median(all_cf_y_benchmark, axis=0)
+    cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0)
+
+    # change in estimates (slightly different to paper)
+    all_delta_theta = np.transpose(dml_short.all_coef - dml_long.all_coef)
+    delta_theta = np.median(all_delta_theta, axis=0)
+
+    # degree of adversity
+    var_g = var_y_residuals_short - var_y_residuals_long
+    var_riesz = nu2_long - nu2_short
+    denom = np.sqrt(np.multiply(var_g, var_riesz), out=np.zeros_like(var_g), where=(var_g > 0) & (var_riesz > 0))
+    rho_sign = np.sign(all_delta_theta)
+    rho_values = np.clip(np.divide(np.absolute(all_delta_theta),
+                                   denom,
+                                   out=np.ones_like(all_delta_theta),
+                                   where=denom != 0),
+                         0.0, 1.0)
+    all_rho_benchmark = np.multiply(rho_values, rho_sign)
+    rho_benchmark = np.median(all_rho_benchmark, axis=0)
+    benchmark_dict = {
+        "cf_y": cf_y_benchmark,
+        "cf_d": cf_d_benchmark,
+        "rho": rho_benchmark,
+        "delta_theta": delta_theta,
+    }
+    return benchmark_dict
diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py
new file mode 100644
index 00000000..805a84ed
--- /dev/null
+++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py
@@ -0,0 +1,178 @@
+import pytest
+import numpy as np
+
+from doubleml.utils.gain_statistics import gain_statistics
+
+
+class test_dml_class():
+    def __init__(self, sensitivity_elements, all_coef):
+        self.sensitivity_elements = sensitivity_elements
+        self.all_coef = all_coef
+
+
+n_obs = 1
+n_rep = 3
+n_coef = 5
+
+
+@pytest.mark.ci
+def test_doubleml_exception_data():
+    dml_correct = test_dml_class(
+        sensitivity_elements={
+            'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+            'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+        },
+        all_coef=np.random.normal(size=(n_coef, n_rep))
+    )
+
+    # incorrect types
+    dml_incorrect = test_dml_class(
+            sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)),
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.sensitivity_elements\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. "
+    msg += r"Expected dict for dml_short\.sensitivity_elements\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Required keys are: \['sigma2', 'nu2'\]"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Required keys are: \['sigma2', 'nu2'\]"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect type for keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': {},
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key sigma2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key sigma2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    dml_incorrect = test_dml_class(
+        sensitivity_elements={
+            'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+            'nu2': {}
+        },
+        all_coef=np.random.normal(size=(n_coef, n_rep))
+    )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key nu2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key nu2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect shape for keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs + 1, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = (r"dml_long does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = (r"dml_short does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs + 1, n_rep, n_coef))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = (r"dml_long does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = (r"dml_short does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # conflicting shape for keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_rep + 1, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of sigma2 are: \(1, 4, 5\) and \(1, 3, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of sigma2 are: \(1, 3, 5\) and \(1, 4, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep + 1, n_coef))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of nu2 are: \(1, 4, 5\) and \(1, 3, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of nu2 are: \(1, 3, 5\) and \(1, 4, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect type for all_coef
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+            },
+            all_coef={}
+        )
+    msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected numpy\.ndarray\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected numpy\.ndarray\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect shape for all_coef
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+            },
+            all_coef=np.random.normal(size=(n_coef + 1, n_rep))
+        )
+    msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(5, 3\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected shape: \(5, 3\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
diff --git a/setup.py b/setup.py
index 4e2045de..b5e38240 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='DoubleML',
-    version='0.7.0',
+    version='0.7.1',
     author='Bach, P., Chernozhukov, V., Kurz, M. S., and Spindler, M.',
     maintainer='Malte S. Kurz',
     maintainer_email='malte.simon.kurz@uni-hamburg.de',