From 0693411e03dc7fb59e7c6f354e8cc5f1c64fbc90 Mon Sep 17 00:00:00 2001 From: Nils Winter Date: Thu, 13 Jul 2023 15:52:25 +0200 Subject: [PATCH 01/30] Add CPM feature selection as model wrapper --- ...ctome_based_predictive_modeling_example.py | 34 +++++ photonai/base/registry/PhotonCore.json | 4 + .../modelwrapper/cpm_feature_selection.py | 139 ++++++++++++++++++ .../test_cpm_feature_selection.py | 85 +++++++++++ 4 files changed, 262 insertions(+) create mode 100644 examples/advanced/connectome_based_predictive_modeling_example.py create mode 100644 photonai/modelwrapper/cpm_feature_selection.py create mode 100644 test/modelwrapper_tests/test_cpm_feature_selection.py diff --git a/examples/advanced/connectome_based_predictive_modeling_example.py b/examples/advanced/connectome_based_predictive_modeling_example.py new file mode 100644 index 00000000..96d898cb --- /dev/null +++ b/examples/advanced/connectome_based_predictive_modeling_example.py @@ -0,0 +1,34 @@ +""" +Connectome-based predictive modeling + +CPM is a method described in the following Nature Protocols article: https://www.nature.com/articles/nprot.2016.178 +It has been used in a number of publications to predict behavior from connectivity data. +CPM works similar to a feature selection method. First, relevant edges (connectivity values) are identified through +correlation analysis. Every edge is correlated with the predictive target. Only significant edges will be used in the +subsequent steps. Next, the edge values for all significant positive and for all significant negative correlations are +summed to create two new features. Lastly, these two features are used as input to another classifier. + +In this example, no connectivity data is used, but the method will still work. +This example is just supposed to show how to use CPM as feature selection and integration tool in PHOTONAI. +""" + +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import KFold + +from photonai import Hyperpipe, PipelineElement + + +X, y = load_breast_cancer(return_X_y=True) + +pipe = Hyperpipe("cpm_feature_selection_pipe", + outer_cv=KFold(n_splits=5, shuffle=True, random_state=15), + inner_cv=KFold(n_splits=5, shuffle=True, random_state=15), + metrics=["balanced_accuracy"], best_config_metric="balanced_accuracy", + project_folder='./tmp') + +pipe += PipelineElement('CPMFeatureSelection', hyperparameters={'corr_method': ['pearson', 'spearman'], + 'p_threshold': [0.01, 0.05]}) + +pipe += PipelineElement('LogisticRegression') + +pipe.fit(X, y) \ No newline at end of file diff --git a/photonai/base/registry/PhotonCore.json b/photonai/base/registry/PhotonCore.json index ff2da2ef..ac51c65e 100644 --- a/photonai/base/registry/PhotonCore.json +++ b/photonai/base/registry/PhotonCore.json @@ -486,5 +486,9 @@ "LocallyLinearEmbedding":[ "sklearn.manifold.LocallyLinearEmbedding", "Transformer" + ], + "CPMFeatureSelection":[ + "photonai.modelwrapper.cpm_feature_selection.CPMFeatureSelection", + "Estimator" ] } diff --git a/photonai/modelwrapper/cpm_feature_selection.py b/photonai/modelwrapper/cpm_feature_selection.py new file mode 100644 index 00000000..312873a4 --- /dev/null +++ b/photonai/modelwrapper/cpm_feature_selection.py @@ -0,0 +1,139 @@ +import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin +from scipy.stats import beta, spearmanr + +from photonai.photonlogger.logger import logger + + +class CPMFeatureSelection(BaseEstimator, TransformerMixin): + """Feature Selection using Connectome-Based Predictive Modeling. + loosely based on this paper https://www.nature.com/articles/nprot.2016.178#Sec10 + + Correlate all features with target and select significant features only. + Sum significant edges for positive correlations and negative correlations separately. + """ + _estimator_type = "transformer" + + def __init__(self, p_threshold: float = .05, corr_method: str = 'pearson'): + """ + Initialize the object. + + Parameters: + p_threshold: + Upper bound for p_values. + corr_method: + Correlation coefficient method. Can be 'pearson' or 'spearman'. + + """ + self.p_threshold = p_threshold + self.corr_method = corr_method + if corr_method not in ['pearson', 'spearman']: + raise NotImplementedError("corr_method has to be either 'pearson' or 'spearman'.") + + self.significant_edges = None + self.positive_edges = None + self.negative_edges = None + self.n_original_features = None + + def fit(self, X: np.ndarray, y: np.ndarray): + """Calculate correlation coefficients between features of X and y. + + Parameters: + X: + The input samples of shape [n_samples, n_original_features] + + y: + The input targets of shape [n_samples, 1] + + """ + n_samples, self.n_original_features = X.shape + + if self.corr_method == 'pearson': + corr = self._columnwise_pearson + elif self.corr_method == 'spearman': + corr = self._columnwise_spearman + else: + corr = None + + r, p = corr(X, y) + self.significant_edges = p < self.p_threshold + self.positive_edges = r > 0 + self.negative_edges = r < 0 + return self + + @staticmethod + def _columnwise_pearson(X, y): + """ + Compute Pearson's correlation coefficient between y and every column of X efficiently + + :param X: ndarray + :param y: ndarray + :return: r_values: array of correlation coefficients + p_values: array of corresponding p-values + """ + n_samples = X.shape[0] + X = (X - X.mean(axis=0)) / X.std(axis=0) + y = (y - y.mean(axis=0)) / y.std(axis=0) + r_values = np.dot(X.T, y) / n_samples + + # I used the p-value calculation described here + # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html + dist = beta(n_samples / 2 - 1, n_samples / 2 - 1, loc=-1, scale=2) + p_values = 2 * dist.cdf(-np.abs(r_values)) + return r_values, p_values + + @staticmethod + def _columnwise_spearman(X, y): + # ToDo: make more efficient by not relying on for loop + n_features = X.shape[1] + r_values, p_values = np.zeros(n_features), np.zeros(n_features) + for i in range(n_features): + corr = spearmanr(X[:, i], y) + r_values[i], p_values[i] = corr.statistic, corr.pvalue + return r_values, p_values + + def transform(self, X: np.ndarray) -> np.ndarray: + """Sum over significant positive and significant negative edges. + + Parameters: + X + The input samples of shape [n_samples, n_original_features] + + Returns: + array of shape [n_samples, 2]. + + """ + return np.stack([np.sum(X[:, (self.significant_edges == self.positive_edges)], axis=1), + np.sum(X[:, (self.significant_edges == self.negative_edges)], axis=1)], axis=1) + + def inverse_transform(self, X: np.ndarray) -> np.ndarray: + """Reverse to original dimension. + + Parameters: + X: + The input samples of shape [n_samples, 2]. + + Returns: + Array of shape [1, n_original_features] + with columns of zeros inserted where features haven't been included in the sum of positive or + negative edges. First value of input is inserted where a significant positive edge had been identified. + Second value of the input is inserted where a significant negative edge had been identified. + + """ + if len(X.shape) == 1: + X = X.reshape(1, -1) + + if X.shape[1] != 2: + msg = "X needs to have 2 features (which correspond to the sum of positive and negative edges)." + logger.error(msg) + raise ValueError(msg) + + if X.shape[0] > 1: + msg = "X can only contain one array with shape [1, 2]." + logger.error(msg) + raise ValueError(msg) + + Xt = np.zeros((X.shape[0], self.n_original_features)) + Xt[:, (self.significant_edges == self.positive_edges)] = X[:, 0] + Xt[:, (self.significant_edges == self.negative_edges)] = X[:, 1] + return Xt diff --git a/test/modelwrapper_tests/test_cpm_feature_selection.py b/test/modelwrapper_tests/test_cpm_feature_selection.py new file mode 100644 index 00000000..c3d4836d --- /dev/null +++ b/test/modelwrapper_tests/test_cpm_feature_selection.py @@ -0,0 +1,85 @@ +import numpy as np + +from scipy.stats import pearsonr, spearmanr + +from sklearn.model_selection import KFold, ShuffleSplit +from sklearn.datasets import load_breast_cancer, load_diabetes + +from photonai import Hyperpipe, PipelineElement +from photonai.helper.photon_base_test import PhotonBaseTest + +from photonai.modelwrapper.cpm_feature_selection import CPMFeatureSelection + + +class CPMFeatureSelectionTest(PhotonBaseTest): + + @classmethod + def setUpClass(cls) -> None: + cls.file = __file__ + super(CPMFeatureSelectionTest, cls).setUpClass() + + def setUp(self): + super(CPMFeatureSelectionTest, self).setUp() + self.X_classif, self.y_classif = load_breast_cancer(return_X_y=True) + self.X_regr, self.y_regr = load_diabetes(return_X_y=True) + self.pipe_classif = Hyperpipe("cpm_feature_selection_pipe_classif", + outer_cv=ShuffleSplit(test_size=0.2, n_splits=1, random_state=15), + inner_cv= KFold(n_splits=3, shuffle=True, random_state=15), + metrics=["accuracy"], best_config_metric="accuracy", + project_folder=self.tmp_folder_path) + self.pipe_regr = Hyperpipe("cpm_feature_selection_pipe_regr", + outer_cv=ShuffleSplit(test_size=0.2, n_splits=1, random_state=15), + inner_cv= KFold(n_splits=3, shuffle=True, random_state=15), + metrics=["mean_absolute_error"], best_config_metric="mean_absolute_error", + project_folder=self.tmp_folder_path) + + def test_cpm_regression(self): + self.pipe_regr += PipelineElement('CPMFeatureSelection', hyperparameters={}) + self.pipe_regr += PipelineElement('LinearRegression') + self.pipe_regr.fit(self.X_regr, self.y_regr) + + def test_cpm_classification(self): + self.pipe_classif += PipelineElement('CPMFeatureSelection', + hyperparameters={'corr_method': ['pearson', 'spearman']}) + self.pipe_classif += PipelineElement('LogisticRegression') + self.pipe_classif.fit(self.X_classif, self.y_classif) + + def test_columnwise_correlation(self): + for cpm_corr_method, scipy_corr_method in [(CPMFeatureSelection._columnwise_pearson, pearsonr), + (CPMFeatureSelection._columnwise_spearman, spearmanr)]: + r_values, p_values = cpm_corr_method(self.X_classif, self.y_classif) + r_scipy_first = scipy_corr_method(self.X_classif[:, 0], self.y_classif) + r_scipy_last = scipy_corr_method(self.X_classif[:, -1], self.y_classif) + self.assertAlmostEqual(r_values[0], r_scipy_first.statistic) + self.assertAlmostEqual(p_values[0], r_scipy_first.pvalue) + self.assertAlmostEqual(r_values[-1], r_scipy_last.statistic) + self.assertAlmostEqual(p_values[-1], r_scipy_last.pvalue) + + def test_cpm_inverse(self): + cpm = PipelineElement('CPMFeatureSelection', + hyperparameters={'corr_method': ['pearson']}) + + cpm.fit(self.X_classif, self.y_classif) + X_transformed, _, _ = cpm.transform(self.X_classif) + X_back, _, _ = cpm.inverse_transform(np.asarray([3, -3])) + self.assertEqual(X_transformed.shape[1], 2) + self.assertEqual(self.X_classif.shape[1], X_back.shape[1]) + self.assertEqual(np.min(X_back), -3) + self.assertEqual(np.max(X_back), 3) + + with self.assertRaises(ValueError): + cpm.inverse_transform(X_transformed) + + with self.assertRaises(ValueError): + cpm.inverse_transform(X_transformed.T) + + def test_wrong_corr_method(self): + with self.assertRaises(NotImplementedError): + PipelineElement('CPMFeatureSelection', corr_method='Pearsons') + + def test_cpm_transform(self): + element = PipelineElement('CPMFeatureSelection', hyperparameters={}) + element.fit(self.X_classif, self.y_classif) + X_transformed, _, _ = element.transform(self.X_classif) + self.assertEqual(X_transformed.shape[0], self.X_classif.shape[0]) + self.assertEqual(X_transformed.shape[1], 2) From e0429082555a2f2715f66e4356e4c2cf21cd692f Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Mon, 21 Aug 2023 13:12:38 +0200 Subject: [PATCH 02/30] updated meta_optimizer example --- examples/optimizer/meta_optimizer.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/optimizer/meta_optimizer.py b/examples/optimizer/meta_optimizer.py index cfc7688b..72e71721 100644 --- a/examples/optimizer/meta_optimizer.py +++ b/examples/optimizer/meta_optimizer.py @@ -7,7 +7,8 @@ inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer='switch', - optimizer_params={'name': 'sk_opt', 'n_configurations': 50}, + optimizer_params={'name': 'grid_search'}, + # optimizer_params={'name': 'random_search', 'n_configurations': 50}, metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy', project_folder='./tmp', @@ -16,7 +17,7 @@ my_pipe.add(PipelineElement('StandardScaler')) my_pipe += PipelineElement('PCA', - hyperparameters={'n_components': IntegerRange(10, 30)}, + hyperparameters={'n_components': IntegerRange(10, 30, step=5)}, test_disabled=True) # set up two learning algorithms in an ensemble @@ -25,10 +26,10 @@ estimator_selection += PipelineElement('RandomForestClassifier', criterion='gini', hyperparameters={'min_samples_split': IntegerRange(2, 4), - 'max_features': ['auto', 'sqrt', 'log2'], + 'max_features': ['sqrt', 'log2'], 'bootstrap': [True, False]}) estimator_selection += PipelineElement('SVC', - hyperparameters={'C': FloatRange(0.5, 25), + hyperparameters={'C': FloatRange(0.5, 25, num=10), 'kernel': ['linear', 'rbf']}) my_pipe += estimator_selection @@ -36,4 +37,4 @@ X, y = load_breast_cancer(return_X_y=True) my_pipe.fit(X, y) -my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator() +print(my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator()) From 126b242ef097ccc01d08164127e82a0e37f85ba6 Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Tue, 12 Sep 2023 12:09:02 +0200 Subject: [PATCH 03/30] add median absolute error to metrics and fix missing kwargs in DefaultPipeline --- examples/advanced/gpboost.py | 85 +++++++++++++++++++++++++ examples/basic/classification_custom.py | 2 +- examples/basic/regression.py | 1 + photonai/base/model_zoo.py | 4 +- photonai/processing/metrics.py | 1 + 5 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 examples/advanced/gpboost.py diff --git a/examples/advanced/gpboost.py b/examples/advanced/gpboost.py new file mode 100644 index 00000000..916807d7 --- /dev/null +++ b/examples/advanced/gpboost.py @@ -0,0 +1,85 @@ +# pip install gpboost -U +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.model_selection import GroupKFold, KFold +from photonai.base import Hyperpipe, PipelineElement +import numpy as np +import pandas as pd +import gpboost as gpb +# from gpboost import GPBoostRegressor + + +class GPBoostDataWrapper(BaseEstimator, ClassifierMixin): + + def __init__(self): + self.needs_covariates = True + # self.gpmodel = gpb.GPModel(likelihood="gaussian") + self.gpboost = None + + + def fit(self, X, y, **kwargs): + self.gpboost = gpb.GPBoostRegressor() + if "clusters" in kwargs: + clst = pd.Series(kwargs["clusters"]) + gpmodel = gpb.GPModel(likelihood="gaussian", group_data=clst) + self.gpboost.fit(X, y, gp_model=gpmodel) + else: + raise NotImplementedError("GPBoost needs clusters") + return self + + def predict(self, X, **kwargs): + clst = pd.Series(kwargs["clusters"]) + preds = self.gpboost.predict(X, group_data_pred=clst) + preds = preds["response_mean"] + return preds + + def save(self): + return None + + +def get_gpboost_pipe(pipe_name, project_folder, split="group"): + + if split == "group": + outercv = GroupKFold(n_splits=10) + else: + outercv = KFold(n_splits=10) + + my_pipe = Hyperpipe(pipe_name, + optimizer='grid_search', + metrics=['mean_absolute_error', 'mean_squared_error', + 'spearman_correlation', 'pearson_correlation'], + best_config_metric='mean_absolute_error', + outer_cv=outercv, + inner_cv=KFold(n_splits=10), + calculate_metrics_across_folds=True, + use_test_set=True, + verbosity=1, + project_folder=project_folder) + + # Add transformer elements + my_pipe += PipelineElement("StandardScaler", hyperparameters={}, + test_disabled=True, with_mean=True, with_std=True) + + my_pipe += PipelineElement.create("GPBoost", GPBoostDataWrapper(), hyperparameters={}) + + return my_pipe + + +def get_mock_data(): + + X = np.random.randint(10, size=(200, 9)) + y = np.sum(X, axis=1) + clst = np.random.randint(10, size=200) + + return X, y, clst + + +if __name__ == '__main__': + + + X, y, clst = get_mock_data() + + # define project folder + project_folder = "/tmp/gpboost_debug" + + my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random") + my_pipe.fit(X, y, clusters=clst) diff --git a/examples/basic/classification_custom.py b/examples/basic/classification_custom.py index e4526186..2e6888fb 100644 --- a/examples/basic/classification_custom.py +++ b/examples/basic/classification_custom.py @@ -5,7 +5,7 @@ my_pipe = Hyperpipe('basic_svm_pipe', inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), - optimizer='sk_opt', + optimizer='random_grid_search', optimizer_params={'n_configurations': 15}, metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy', diff --git a/examples/basic/regression.py b/examples/basic/regression.py index 1ff648f0..676dabc1 100644 --- a/examples/basic/regression.py +++ b/examples/basic/regression.py @@ -2,6 +2,7 @@ from photonai import RegressionPipe my_pipe = RegressionPipe('diabetes', + best_config_metric='median_absolute_error', add_default_pipeline_elements=True, scaling=True, imputation=False, diff --git a/photonai/base/model_zoo.py b/photonai/base/model_zoo.py index a714c22f..d1bbc217 100644 --- a/photonai/base/model_zoo.py +++ b/photonai/base/model_zoo.py @@ -217,7 +217,7 @@ def set_default_pipeline(self, scaling, imputation, imputation_nan_value, featur logger.photon_system_log("---") logger.stars() - def fit(self, X=None, y=None): + def fit(self, X=None, y=None, **kwargs): if (X is not None and self.X_csv_path is not None) or (y is not None and self.y_csv_path is not None): raise ValueError("You can either give the fit function data or the pipe definition paths " "to csv files to load data from. Not both.") @@ -228,7 +228,7 @@ def fit(self, X=None, y=None): X = X if X is not None else pd.read_csv(self.X_csv_path, delimiter=self.delimiter) y = y if y is not None else pd.read_csv(self.y_csv_path, delimiter=self.delimiter) - super().fit(X, y) + super().fit(X, y, **kwargs) class ClassificationPipe(DefaultPipeline): diff --git a/photonai/processing/metrics.py b/photonai/processing/metrics.py index 5c4d8c3c..f1c56863 100644 --- a/photonai/processing/metrics.py +++ b/photonai/processing/metrics.py @@ -37,6 +37,7 @@ class Scorer: # Regression 'mean_squared_error': ('sklearn.metrics', 'mean_squared_error', 'error'), 'mean_absolute_error': ('sklearn.metrics', 'mean_absolute_error', 'error'), + 'median_absolute_error': ('sklearn.metrics', 'median_absolute_error', 'error'), 'explained_variance': ('sklearn.metrics', 'explained_variance_score', 'score'), 'r2': ('sklearn.metrics', 'r2_score', 'score'), 'pearson_correlation': ('photonai.processing.metrics', 'pearson_correlation', 'score'), From c32ca0ec2f138f4e7c3b028503d246bc676e0b73 Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Wed, 13 Sep 2023 13:12:51 +0200 Subject: [PATCH 04/30] add permutation run nr to permutation test --- examples/optimizer/meta_optimizer.py | 4 ++-- photonai/base/hyperpipe.py | 1 + photonai/processing/permutation_test.py | 1 + photonai/processing/results_structure.py | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/optimizer/meta_optimizer.py b/examples/optimizer/meta_optimizer.py index 72e71721..57e24e9e 100644 --- a/examples/optimizer/meta_optimizer.py +++ b/examples/optimizer/meta_optimizer.py @@ -7,8 +7,8 @@ inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer='switch', - optimizer_params={'name': 'grid_search'}, - # optimizer_params={'name': 'random_search', 'n_configurations': 50}, + # optimizer_params={'name': 'grid_search'}, + optimizer_params={'name': 'random_search', 'n_configurations': 10}, metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy', project_folder='./tmp', diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 70526ca8..5c5493bd 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -1243,6 +1243,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar # get feature importances logger.photon_system_log("Permutation Importances: Calculating performances for " + fold_str) + perm_imps = permutation_importance(pipeline, test_X, test_y, **kwargs) # store into list diff --git a/photonai/processing/permutation_test.py b/photonai/processing/permutation_test.py index eaf75578..c217f6b4 100644 --- a/photonai/processing/permutation_test.py +++ b/photonai/processing/permutation_test.py @@ -160,6 +160,7 @@ def run_parallelized_permutation(hyperpipe_constructor, X, perm_run, y_perm, per perm_pipe.verbosity = verbosity perm_pipe.name = perm_pipe.name + '_perm_' + str(perm_run) perm_pipe.permutation_id = permutation_id + perm_pipe.permutation_run = perm_run # print(y_perm) po = OutputSettings(mongodb_connect_url=perm_pipe.output_settings.mongodb_connect_url, diff --git a/photonai/processing/results_structure.py b/photonai/processing/results_structure.py index 0b36b7bf..4d685b0b 100644 --- a/photonai/processing/results_structure.py +++ b/photonai/processing/results_structure.py @@ -217,6 +217,7 @@ class Meta: output_folder = fields.CharField(blank=True) permutation_id = fields.CharField(blank=True) + permutation_run = fields.IntegerField(blank=True) permutation_failed = fields.CharField(blank=True) permutation_test = fields.EmbeddedDocumentField(MDBPermutationResults, blank=True) From ef5fbb621d3ac9a8f42e457cf50302cf2c48371b Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Thu, 14 Sep 2023 17:42:26 +0200 Subject: [PATCH 05/30] add computation run nr to result object --- photonai/processing/permutation_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/photonai/processing/permutation_test.py b/photonai/processing/permutation_test.py index c217f6b4..ef51e4fd 100644 --- a/photonai/processing/permutation_test.py +++ b/photonai/processing/permutation_test.py @@ -160,7 +160,6 @@ def run_parallelized_permutation(hyperpipe_constructor, X, perm_run, y_perm, per perm_pipe.verbosity = verbosity perm_pipe.name = perm_pipe.name + '_perm_' + str(perm_run) perm_pipe.permutation_id = permutation_id - perm_pipe.permutation_run = perm_run # print(y_perm) po = OutputSettings(mongodb_connect_url=perm_pipe.output_settings.mongodb_connect_url, @@ -173,6 +172,7 @@ def run_parallelized_permutation(hyperpipe_constructor, X, perm_run, y_perm, per print('Fitting permutation ' + str(perm_run) + ' ...') perm_pipe.fit(X, y_perm, **kwargs) perm_pipe.results.computation_completed = True + perm_pipe.results.permutation_run = perm_run PermutationTest.clear_data_and_save(perm_pipe) print('Finished permutation ' + str(perm_run) + ' ...') except Exception as e: From 94d1dd3badf6085ae03ba790e22c9fa25e2007d1 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 25 Sep 2023 17:25:55 +0200 Subject: [PATCH 06/30] Added score_train parameter --- photonai/base/hyperpipe.py | 10 ++++++++-- photonai/processing/inner_folds.py | 25 ++++++++++++++++++------- photonai/processing/outer_folds.py | 10 +++++++--- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 5c5493bd..426a0d34 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -297,7 +297,8 @@ def __init__(self, name: Optional[str], cache_folder: str = None, nr_of_processes: int = 1, multi_threading: bool = True, - allow_multidim_targets: bool = False): + allow_multidim_targets: bool = False, + score_train: bool = True): """ Initialize the object. @@ -420,6 +421,9 @@ def __init__(self, name: Optional[str], allow_multidim_targets: Allows multidimensional targets. + score_train: + metrics for the train-set are only calculated if score_train is true. + """ self.name = re.sub(r'\W+', '', name) @@ -514,6 +518,7 @@ def __init__(self, name: Optional[str], self.permutation_id = permutation_id self.allow_multidim_targets = allow_multidim_targets self.is_final_fit = False + self.score_train = score_train # ====================== Random Seed =========================== self.random_state = random_seed @@ -1085,7 +1090,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs): cache_folder=self.cache_folder, cache_updater=self.recursive_cache_folder_propagation, dummy_estimator=dummy_estimator, - result_obj=outer_fold) + result_obj=outer_fold, + score_train=self.score_train) # 2. monitor outputs self.results.outer_folds.append(outer_fold) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index 1c665866..1e8a5ece 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -66,7 +66,8 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos, training: bool = False, cache_folder=None, cache_updater=None, - scorer: Scorer = None): + scorer: Scorer = None, + score_train: bool = True): self.params = specific_config self.pipe = pipe_ctor @@ -81,6 +82,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos, self.raise_error = raise_error self.training = training + self.score_train = score_train def fit(self, X, y, **kwargs): """Iterates over cross-validation folds and trains the pipeline, @@ -136,7 +138,8 @@ def fit(self, X, y, **kwargs): kwargs_cv_train), test_data=InnerFoldManager.JobData(test_X, test_y, test, kwargs_cv_test), - scorer=self.scorer) + scorer=self.scorer, + score_train=self.score_train) # only for unparallel processing # inform children in which inner fold we are @@ -224,7 +227,8 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t callbacks=self.optimization_constraints, train_data=self.JobData(train_cut_X, train_cut_y, train_cut, train_cut_kwargs), test_data=self.JobData(test_X, test_y, test, kwargs_cv_test), - scorer=self.scorer) + scorer=self.scorer, + score_train=self.score_train) curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(job_data) learning_curves.append([self.cross_validation_infos.learning_curves_cut.values[i], curr_test_cut.metrics, curr_train_cut.metrics]) @@ -239,7 +243,7 @@ def __init__(self, X, y, indices, cv_kwargs): class InnerCVJob: - def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer): + def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer, score_train): self.pipe = pipe self.config = config self.metrics = metrics @@ -247,6 +251,7 @@ def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scor self.train_data = train_data self.test_data = test_data self.scorer = scorer + self.score_train = score_train @staticmethod def update_config_item_with_inner_fold(config_item, fold_cnt, curr_train_fold, curr_test_fold, time_monitor, @@ -344,7 +349,7 @@ def fit_and_score(job: InnerCVJob): # start fitting pipe.fit(job.train_data.X, job.train_data.y, **job.train_data.cv_kwargs) - logger.debug('Scoring Training Data') + logger.debug('Scoring Test Data') # score test data curr_test_fold = InnerFoldManager.score(pipe, job.test_data.X, job.test_data.y, job.metrics, @@ -352,9 +357,15 @@ def fit_and_score(job: InnerCVJob): scorer=job.scorer, **job.test_data.cv_kwargs) - logger.debug('Scoring Test Data') + logger.debug('Scoring Training Data') # score train data - curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, + curr_train_fold = MDBScoreInformation(metrics={}, + score_duration=0, + y_pred=np.zeros_like(job.train_data.y), y_true=job.train_data.y, + indices=np.asarray(job.train_data.indices).tolist(), + probabilities=None) + if job.score_train: + curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, indices=job.train_data.indices, training=True, scorer=job.scorer, **job.train_data.cv_kwargs) diff --git a/photonai/processing/outer_folds.py b/photonai/processing/outer_folds.py index 4891b14e..9235fae1 100644 --- a/photonai/processing/outer_folds.py +++ b/photonai/processing/outer_folds.py @@ -63,7 +63,8 @@ def __init__(self, pipe, cache_folder=None, cache_updater=None, dummy_estimator=None, - result_obj=None): + result_obj=None, + score_train: bool = True): self.outer_fold_id = outer_fold_id self.cross_validation_info = cross_validation_info self.scorer = Scorer(optimization_info.metrics) @@ -71,6 +72,7 @@ def __init__(self, pipe, self._pipe = pipe self.copy_pipe_fnc = self._pipe.copy_me self.dummy_estimator = dummy_estimator + self.score_train = score_train self.cache_folder = cache_folder self.cache_updater = cache_updater @@ -246,6 +248,7 @@ def fit(self, X, y=None, **kwargs): indices=self.cross_validation_info.outer_folds[self.outer_fold_id].test_indices, metrics=self.optimization_info.metrics, scorer=self.scorer, + score_train=self.score_train, **self._test_kwargs) logger.debug('... scoring training data') @@ -255,6 +258,7 @@ def fit(self, X, y=None, **kwargs): metrics=self.optimization_info.metrics, training=True, scorer=self.scorer, + score_train=self.score_train, **self._validation_kwargs) best_config_performance_mdb.training = train_score_mdb @@ -386,7 +390,7 @@ def _fit_dummy(self): self.dummy_estimator.fit(dummy_y, self._validation_y) train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y, metrics=self.optimization_info.metrics, - scorer=self.scorer) + scorer=self.scorer, score_train=self.score_train) # fill result tree with fold information inner_fold = MDBInnerFold() @@ -396,7 +400,7 @@ def _fit_dummy(self): test_scores = InnerFoldManager.score(self.dummy_estimator, self._test_X, self._test_y, metrics=self.optimization_info.metrics, - scorer=self.scorer) + scorer=self.scorer, score_train=self.score_train) print_metrics("DUMMY", test_scores.metrics) inner_fold.validation = test_scores From d23d3a12f27f48a1d35a37cc87bd6b48b529042e Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 25 Sep 2023 21:42:27 +0200 Subject: [PATCH 07/30] fix missing metrics error --- photonai/processing/inner_folds.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index 1e8a5ece..c82c00f5 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -359,7 +359,10 @@ def fit_and_score(job: InnerCVJob): logger.debug('Scoring Training Data') # score train data - curr_train_fold = MDBScoreInformation(metrics={}, + scores = {} + for metric in list(curr_test_fold.metrics.keys()): + scores[metric] = 0 + curr_train_fold = MDBScoreInformation(metrics=scores, score_duration=0, y_pred=np.zeros_like(job.train_data.y), y_true=job.train_data.y, indices=np.asarray(job.train_data.indices).tolist(), From 8773978c6848da98d1abc5796cf4f6b38420a461 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 25 Sep 2023 23:19:43 +0200 Subject: [PATCH 08/30] Fixed serialization error --- photonai/processing/inner_folds.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index c82c00f5..6d2f2541 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -364,9 +364,10 @@ def fit_and_score(job: InnerCVJob): scores[metric] = 0 curr_train_fold = MDBScoreInformation(metrics=scores, score_duration=0, - y_pred=np.zeros_like(job.train_data.y), y_true=job.train_data.y, + y_pred=list(np.zeros_like(job.train_data.y)), + y_true=list(job.train_data.y), indices=np.asarray(job.train_data.indices).tolist(), - probabilities=None) + probabilities=[]) if job.score_train: curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, indices=job.train_data.indices, From 135a9693817cfc10684971e8fd1b237ff66b61d3 Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Wed, 27 Sep 2023 12:59:42 +0200 Subject: [PATCH 09/30] add LinearDiscriminantAnalysis to registry --- photonai/base/registry/PhotonCore.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/photonai/base/registry/PhotonCore.json b/photonai/base/registry/PhotonCore.json index ff2da2ef..54f00181 100644 --- a/photonai/base/registry/PhotonCore.json +++ b/photonai/base/registry/PhotonCore.json @@ -295,6 +295,10 @@ "sklearn.linear_model.LogisticRegression", "Estimator" ], + "LinearDiscriminantAnalysis": [ + "sklearn.discriminant_analysis.LinearDiscriminantAnalysis", + "Transformer" + ], "PassiveAggressiveClassifier":[ "sklearn.linear_model.PassiveAggressiveClassifier", "Estimator" From f7c6d5a154df254d91786d39b6e4d54ff02497cd Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Thu, 23 Nov 2023 17:37:15 +0100 Subject: [PATCH 10/30] adapt metrics to sklearn --- photonai/processing/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/photonai/processing/metrics.py b/photonai/processing/metrics.py index f1c56863..1670bf49 100644 --- a/photonai/processing/metrics.py +++ b/photonai/processing/metrics.py @@ -29,9 +29,9 @@ class Scorer: 'precision': ('sklearn.metrics', 'precision_score', 'score'), 'recall': ('sklearn.metrics', 'recall_score', 'score'), 'auc': ('sklearn.metrics', 'roc_auc_score', 'score'), - 'sensitivity': ('photonai.processing.metrics', 'sensitivity', 'score'), + 'sensitivity': ('sklearn.metrics', 'recall_score', 'score'), 'specificity': ('photonai.processing.metrics', 'specificity', 'score'), - 'balanced_accuracy': ('photonai.processing.metrics', 'balanced_accuracy', 'score'), + 'balanced_accuracy': ('sklearn.metrics', 'balanced_accuracy_score', 'score'), 'categorical_accuracy': ('photonai.processing.metrics', 'categorical_accuracy_score', 'score'), # Regression From b910f681fa2e179d353da420fc8deb04491b410d Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Thu, 22 Feb 2024 14:15:53 +0100 Subject: [PATCH 11/30] changed features importances to backmapped versions in result tree --- photonai/base/hyperpipe.py | 2 +- photonai/processing/metrics.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 426a0d34..a1ba3551 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -944,7 +944,6 @@ def _finalize_optimization(self): if not feature_importances: logger.info("No feature importances available for {}!".format(self.optimum_pipe.elements[-1][0])) else: - self.results.best_config_feature_importances = feature_importances # write backmapping file only if optimum_pipes inverse_transform works completely. # restriction: only a faulty inverse_transform is considered, missing ones are further ignored. @@ -958,6 +957,7 @@ def _finalize_optimization(self): # save backmapping self.results_handler.save_backmapping( filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping) + self.results.best_config_feature_importances = list(np.squeeze(backmapping)) else: logger.info('Could not save feature importance: backmapping NOT successful.') diff --git a/photonai/processing/metrics.py b/photonai/processing/metrics.py index f1c56863..1670bf49 100644 --- a/photonai/processing/metrics.py +++ b/photonai/processing/metrics.py @@ -29,9 +29,9 @@ class Scorer: 'precision': ('sklearn.metrics', 'precision_score', 'score'), 'recall': ('sklearn.metrics', 'recall_score', 'score'), 'auc': ('sklearn.metrics', 'roc_auc_score', 'score'), - 'sensitivity': ('photonai.processing.metrics', 'sensitivity', 'score'), + 'sensitivity': ('sklearn.metrics', 'recall_score', 'score'), 'specificity': ('photonai.processing.metrics', 'specificity', 'score'), - 'balanced_accuracy': ('photonai.processing.metrics', 'balanced_accuracy', 'score'), + 'balanced_accuracy': ('sklearn.metrics', 'balanced_accuracy_score', 'score'), 'categorical_accuracy': ('photonai.processing.metrics', 'categorical_accuracy_score', 'score'), # Regression From b960e4004c085e2de557233960e6dff334c4eea9 Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Thu, 22 Feb 2024 14:34:57 +0100 Subject: [PATCH 12/30] store backmapped features instead of original ones in result tree root --- photonai/base/hyperpipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 5c5493bd..ef4ca12b 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -939,7 +939,7 @@ def _finalize_optimization(self): if not feature_importances: logger.info("No feature importances available for {}!".format(self.optimum_pipe.elements[-1][0])) else: - self.results.best_config_feature_importances = feature_importances + # write backmapping file only if optimum_pipes inverse_transform works completely. # restriction: only a faulty inverse_transform is considered, missing ones are further ignored. @@ -953,6 +953,7 @@ def _finalize_optimization(self): # save backmapping self.results_handler.save_backmapping( filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping) + self.results.best_config_feature_importances = list(np.squeeze(backmapping)) else: logger.info('Could not save feature importance: backmapping NOT successful.') From 339d7d0b9adbf035455ec4df6d592059ce4e5d69 Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Thu, 22 Feb 2024 15:28:17 +0100 Subject: [PATCH 13/30] reverse backmapped feature importances im tree --- photonai/base/hyperpipe.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index ef4ca12b..0812972a 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -939,23 +939,22 @@ def _finalize_optimization(self): if not feature_importances: logger.info("No feature importances available for {}!".format(self.optimum_pipe.elements[-1][0])) else: - + self.results.best_config_feature_importances = feature_importances # write backmapping file only if optimum_pipes inverse_transform works completely. # restriction: only a faulty inverse_transform is considered, missing ones are further ignored. - with warnings.catch_warnings(record=True) as w: - # get backmapping - backmapping, _, _ = self.optimum_pipe.\ - inverse_transform(np.array(feature_importances).reshape(1, -1), None) - - if not any("The inverse transformation is not possible for" in s - for s in [e.message.args[0] for e in w]): - # save backmapping - self.results_handler.save_backmapping( - filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping) - self.results.best_config_feature_importances = list(np.squeeze(backmapping)) - else: - logger.info('Could not save feature importance: backmapping NOT successful.') + # with warnings.catch_warnings(record=True) as w: + # # get backmapping + # backmapping, _, _ = self.optimum_pipe.\ + # inverse_transform(np.array(feature_importances).reshape(1, -1), None) + # + # if not any("The inverse transformation is not possible for" in s + # for s in [e.message.args[0] for e in w]): + # # save backmapping + # self.results_handler.save_backmapping( + # filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping) + # else: + # logger.info('Could not save feature importance: backmapping NOT successful.') # save learning curves if self.cross_validation.learning_curves: From 4ca1459436c7f00fbd7f97bdfa9f83212cf8f9bb Mon Sep 17 00:00:00 2001 From: Ramona Leenings Date: Thu, 22 Feb 2024 16:18:32 +0100 Subject: [PATCH 14/30] removing logging output for backmapping --- photonai/base/hyperpipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 0812972a..ea913b02 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -933,7 +933,7 @@ def _finalize_optimization(self): logger.error(str(e)) # get feature importances of optimum pipe - logger.info("Mapping back feature importances...") + # logger.info("Mapping back feature importances...") feature_importances = self.optimum_pipe.feature_importances_ if not feature_importances: From cc2fb6398cc36de7fac8c9b06f0766aa52bce2c2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 12:17:47 +0000 Subject: [PATCH 15/30] Bump scikit-learn from 1.1.3 to 1.5.2 Bumps [scikit-learn](https://github.com/scikit-learn/scikit-learn) from 1.1.3 to 1.5.2. - [Release notes](https://github.com/scikit-learn/scikit-learn/releases) - [Commits](https://github.com/scikit-learn/scikit-learn/compare/1.1.3...1.5.2) --- updated-dependencies: - dependency-name: scikit-learn dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- photonai/requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/photonai/requirements.txt b/photonai/requirements.txt index 30c8825d..769bcfc4 100644 --- a/photonai/requirements.txt +++ b/photonai/requirements.txt @@ -1,7 +1,7 @@ ###### Requirements with temporary Version Specifiers ###### numpy matplotlib -scikit-learn==1.1.3 +scikit-learn==1.5.2 pandas plotly imbalanced-learn diff --git a/requirements.txt b/requirements.txt index f6d6db70..31340106 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy matplotlib -scikit-learn==1.3.0 +scikit-learn==1.5.2 pandas plotly imbalanced-learn==0.11.0 From c7d1e3fa71d560d6060df00ca0ac4f2af524f6e9 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Wed, 23 Oct 2024 14:34:25 +0200 Subject: [PATCH 16/30] Update requirements.txt --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 31340106..0b648199 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ matplotlib scikit-learn==1.5.2 pandas plotly -imbalanced-learn==0.11.0 +imbalanced-learn==0.12.4 pymodm scipy statsmodels @@ -14,4 +14,4 @@ dask>=2021.10.0 distributed scikit-optimize xlrd -pbr \ No newline at end of file +pbr From 91f5ad5ecd3d7684810b220f9017ede824ee0e43 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Wed, 23 Oct 2024 15:08:18 +0200 Subject: [PATCH 17/30] Fixed tests --- photonai/modelwrapper/keras_base_estimator.py | 4 ++-- photonai/modelwrapper/keras_base_models.py | 2 +- photonai/processing/metrics.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/photonai/modelwrapper/keras_base_estimator.py b/photonai/modelwrapper/keras_base_estimator.py index 70d6401f..08c37934 100644 --- a/photonai/modelwrapper/keras_base_estimator.py +++ b/photonai/modelwrapper/keras_base_estimator.py @@ -76,7 +76,7 @@ def save(self, filename): with open(filename + ".json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 - self.model.save_weights(filename + ".h5") + self.model.save_weights(filename + ".weights.h5") def load(self, filename): # load json and create model @@ -86,6 +86,6 @@ def load(self, filename): loaded_model = keras.models.model_from_json(loaded_model_json) # load weights into new model - loaded_model.load_weights(filename + ".h5") + loaded_model.load_weights(filename + ".weights.h5") self.model = loaded_model self.init_weights = self.model.get_weights() diff --git a/photonai/modelwrapper/keras_base_models.py b/photonai/modelwrapper/keras_base_models.py index 9f7f026b..71d9b43d 100644 --- a/photonai/modelwrapper/keras_base_models.py +++ b/photonai/modelwrapper/keras_base_models.py @@ -245,7 +245,7 @@ def optimizer(self, value): if value.lower() not in __supported_optimizers__.keys(): raise ValueError("Optimizer is not supported by keras. Please use one of: "+str(__supported_optimizers__)) else: - self._optimizer = __supported_optimizers__[value.lower()](lr=self.learning_rate) + self._optimizer = __supported_optimizers__[value.lower()](learning_rate=self.learning_rate) @property def target_activation(self): diff --git a/photonai/processing/metrics.py b/photonai/processing/metrics.py index 5c4d8c3c..ed69fc12 100644 --- a/photonai/processing/metrics.py +++ b/photonai/processing/metrics.py @@ -107,9 +107,9 @@ def register_custom_metric(cls, metric: Union[Metric_Type, Tuple[str, Metric_Typ metric_obj = metric def metric_func(y_true, y_pred): - metric_obj.reset_states() + metric_obj.reset_state() metric_obj.update_state(y_true=y_true, y_pred=y_pred) - return float(cls.dynamic_keras_import.backend.eval(metric_obj.result())) + return float(metric_obj.result().numpy()) Scorer.CUSTOM_ELEMENT_DICTIONARY[metric_name] = metric_func elif callable(metric): From 92e933ed9a9fee998f255812ca227f9b822c5b7b Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Wed, 23 Oct 2024 16:01:45 +0200 Subject: [PATCH 18/30] Fixed keras model load and save --- photonai/modelwrapper/keras_base_estimator.py | 27 ++++++++++++------- test/modelwrapper_tests/test_keras_basic.py | 23 +++++++++++----- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/photonai/modelwrapper/keras_base_estimator.py b/photonai/modelwrapper/keras_base_estimator.py index 08c37934..5c211217 100644 --- a/photonai/modelwrapper/keras_base_estimator.py +++ b/photonai/modelwrapper/keras_base_estimator.py @@ -1,4 +1,5 @@ import warnings +import os import tensorflow.keras as keras from sklearn.base import BaseEstimator @@ -72,20 +73,28 @@ def encode_targets(self, y): def save(self, filename): # serialize model to JSON + warnings.warn("Using json export for compatibility, will be deprecated in future.") model_json = self.model.to_json() with open(filename + ".json", "w") as json_file: - json_file.write(model_json) + json_file.write(model_json) # serialize weights to HDF5 self.model.save_weights(filename + ".weights.h5") + self.model.save(filename + ".keras") def load(self, filename): # load json and create model - json_file = open(filename + '.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - loaded_model = keras.models.model_from_json(loaded_model_json) + if not os.path.exists(filename+'.keras'): + warnings.warn("Using json import for compatiblity, will be deprecated in future. " + "Please save your model to get a *.keras file") + json_file = open(filename + '.json', 'r') + loaded_model_json = json_file.read() + json_file.close() + loaded_model = keras.models.model_from_json(loaded_model_json) + + loaded_model.load_weights(filename + ".weights.h5") + self.model = loaded_model + self.init_weights = self.model.get_weights() + else: + # load weights into new model + self.model = keras.models.load_model(filename + '.keras') - # load weights into new model - loaded_model.load_weights(filename + ".weights.h5") - self.model = loaded_model - self.init_weights = self.model.get_weights() diff --git a/test/modelwrapper_tests/test_keras_basic.py b/test/modelwrapper_tests/test_keras_basic.py index b6d92f77..b84673e7 100644 --- a/test/modelwrapper_tests/test_keras_basic.py +++ b/test/modelwrapper_tests/test_keras_basic.py @@ -1,7 +1,7 @@ from sklearn.datasets import load_breast_cancer, load_diabetes import tensorflow as tf from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense, Dropout +from tensorflow.keras.layers import Dense, Dropout, Input, Activation import numpy as np import warnings import os @@ -16,7 +16,8 @@ def setUp(self): self.X, self.y = load_breast_cancer(return_X_y=True) self.model = Sequential() - self.model.add(Dense(3, input_dim=self.X.shape[1], activation='relu')) + self.model.add(Input(shape=[self.X.shape[1]])) + self.model.add(Dense(3, activation="relu")) self.model.add(Dropout(0.1)) self.model.add(Dense(2, activation='softmax')) self.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) @@ -24,8 +25,8 @@ def setUp(self): self.estimator_type = KerasBaseClassifier inputs = tf.keras.Input(shape=(self.X.shape[1],)) - x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) - outputs = tf.keras.layers.Dense(2, activation=tf.nn.softmax)(x) + x = tf.keras.layers.Dense(4, activation=tf.keras.activations.relu)(inputs) + outputs = tf.keras.layers.Dense(2, activation=tf.keras.activations.softmax)(x) self.tf_model = tf.keras.Model(inputs=inputs, outputs=outputs) self.tf_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) @@ -57,10 +58,18 @@ def test_tf_model(self): estimator.save("keras_example_saved_model") - reload_estinator = self.estimator_type() - reload_estinator.load("keras_example_saved_model") + reload_estimator = self.estimator_type() + reload_estimator.load("keras_example_saved_model") + + np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estimator.predict(self.X), decimal=3) + + # remove novel keras file and test legacy import + os.remove("keras_example_saved_model.keras") + + reload_estimator_legacy = self.estimator_type() + reload_estimator_legacy.load("keras_example_saved_model") - np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estinator.predict(self.X), decimal=3) + np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estimator.predict(self.X), decimal=3) # remove saved keras files for fname in os.listdir("."): From 1fb1a1eb7f5d6e4d65f75c49a483957ec299e553 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Fri, 25 Oct 2024 12:29:21 +0200 Subject: [PATCH 19/30] Removed backmapping tests --- test/base_tests/test_hyperpipe.py | 14 ++++++------ test/processing_tests/test_results_handler.py | 22 ++++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/test/base_tests/test_hyperpipe.py b/test/base_tests/test_hyperpipe.py index cdefbf5d..4c989956 100644 --- a/test/base_tests/test_hyperpipe.py +++ b/test/base_tests/test_hyperpipe.py @@ -700,14 +700,14 @@ def test_finalize_optimization(self): # save optimum model self.assert_best_model() - # backmapping + # backmapping - removed in 339d7d0 # because the pca is test disabled, we expect the number of features - self.assertEqual(len(self.hyperpipe.results.best_config_feature_importances[0]), self.__X.shape[1]) - backmapped_feature_importances = os.path.join(self.hyperpipe.output_settings.results_folder, - 'optimum_pipe_feature_importances_backmapped.csv') - self.assertTrue(os.path.isfile(backmapped_feature_importances)) - loaded_array = np.loadtxt(open(backmapped_feature_importances, 'rb'), delimiter=",") - self.assertEqual(loaded_array.shape[0], self.__X.shape[1]) + #self.assertEqual(len(self.hyperpipe.results.best_config_feature_importances[0]), self.__X.shape[1]) + #backmapped_feature_importances = os.path.join(self.hyperpipe.output_settings.results_folder, + # 'optimum_pipe_feature_importances_backmapped.csv') + #self.assertTrue(os.path.isfile(backmapped_feature_importances)) + #loaded_array = np.loadtxt(open(backmapped_feature_importances, 'rb'), delimiter=",") + #self.assertEqual(loaded_array.shape[0], self.__X.shape[1]) def assert_best_model(self): self.assertTrue(os.path.isfile(os.path.join(self.hyperpipe.output_settings.results_folder, diff --git a/test/processing_tests/test_results_handler.py b/test/processing_tests/test_results_handler.py index 1d94909e..e7edb483 100644 --- a/test/processing_tests/test_results_handler.py +++ b/test/processing_tests/test_results_handler.py @@ -100,24 +100,26 @@ def test_save_backmapping_weird_format(self): def test_save_backmapping_csv(self): """ Check dimension of feature backmapping equals input dimensions for less than 1000 features. + removed in 339d7d0 """ - backmapping = np.loadtxt(os.path.join(self.hyperpipe.output_settings.results_folder, - 'optimum_pipe_feature_importances_backmapped.csv'), delimiter=',') - self.assertEqual(np.shape(self.__X)[1], backmapping.size) + #backmapping = np.loadtxt(os.path.join(self.hyperpipe.output_settings.results_folder, + # 'optimum_pipe_feature_importances_backmapped.csv'), delimiter=',') + #self.assertEqual(np.shape(self.__X)[1], backmapping.size) def test_save_backmapping_npz(self): """ Check dimension of feature backmapping equals input dimensions for more than 1000 features. + removed in 339d7d0 """ # run another hyperpipe with more than 1000 features # use np.tile to copy features until at least 1000 features are reached - X = np.tile(self.__X, (1, 35)) - self.hyperpipe.fit(X, self.__y) - npzfile = np.load(os.path.join(self.hyperpipe.output_settings.results_folder, - 'optimum_pipe_feature_importances_backmapped.npz')) - self.assertEqual(len(npzfile.files), 1) - backmapping = npzfile[npzfile.files[0]] - self.assertEqual(np.shape(X)[1], backmapping.size) + #X = np.tile(self.__X, (1, 35)) + #self.hyperpipe.fit(X, self.__y) + #npzfile = np.load(os.path.join(self.hyperpipe.output_settings.results_folder, + # 'optimum_pipe_feature_importances_backmapped.npz')) + #self.assertEqual(len(npzfile.files), 1) + #backmapping = npzfile[npzfile.files[0]] + #self.assertEqual(np.shape(X)[1], backmapping.size) def test_save_backmapping_stack(self): # build hyperpipe with stack first From f1e54cbe3afb815b880808538a28c271c72eccaf Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Fri, 25 Oct 2024 12:53:19 +0200 Subject: [PATCH 20/30] removed tmp dir usage --- examples/advanced/gpboost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/advanced/gpboost.py b/examples/advanced/gpboost.py index 916807d7..3137d9e4 100644 --- a/examples/advanced/gpboost.py +++ b/examples/advanced/gpboost.py @@ -79,7 +79,7 @@ def get_mock_data(): X, y, clst = get_mock_data() # define project folder - project_folder = "/tmp/gpboost_debug" + project_folder = "./tmp/gpboost_debug" my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random") my_pipe.fit(X, y, clusters=clst) From 4ebebdb825a7fa4597003fc51ef2aaf189273439 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Fri, 25 Oct 2024 13:11:16 +0200 Subject: [PATCH 21/30] modified dependabot to create PRs for develop --- .github/dependabot.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 1e1cb541..6859dff2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,9 +7,11 @@ version: 2 updates: - package-ecosystem: "github-actions" # See documentation for possible values directory: "/" # Location of package manifests + target-branch: "develop" schedule: interval: "daily" - package-ecosystem: "pip" directory: "/" + target-branch: "develop" schedule: interval: "daily" From 8d7bcae5c50b0549c9e70c5305e84fd64a8fb35e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:12:37 +0000 Subject: [PATCH 22/30] Bump actions/setup-python from 4 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/documentation_build_and_update.yml | 2 +- .github/workflows/documentation_deployment.yml | 2 +- .github/workflows/python-deploy_to_pypi.yml | 2 +- .github/workflows/python-test_and_deploy.yml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/documentation_build_and_update.yml b/.github/workflows/documentation_build_and_update.yml index 2ff02ace..f0b7feab 100644 --- a/.github/workflows/documentation_build_and_update.yml +++ b/.github/workflows/documentation_build_and_update.yml @@ -13,7 +13,7 @@ jobs: fetch-depth: 0 - name: Install Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.9' diff --git a/.github/workflows/documentation_deployment.yml b/.github/workflows/documentation_deployment.yml index 0d3a0f60..b26a53a6 100644 --- a/.github/workflows/documentation_deployment.yml +++ b/.github/workflows/documentation_deployment.yml @@ -16,7 +16,7 @@ jobs: fetch-depth: 0 - name: Install Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.9' diff --git a/.github/workflows/python-deploy_to_pypi.yml b/.github/workflows/python-deploy_to_pypi.yml index c7805e95..52516024 100644 --- a/.github/workflows/python-deploy_to_pypi.yml +++ b/.github/workflows/python-deploy_to_pypi.yml @@ -13,7 +13,7 @@ jobs: with: fetch-depth: 0 - name: Set up Python 3.10.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.10.8 - name: Install pypa/build diff --git a/.github/workflows/python-test_and_deploy.yml b/.github/workflows/python-test_and_deploy.yml index 01a75d79..b5f9d0c1 100644 --- a/.github/workflows/python-test_and_deploy.yml +++ b/.github/workflows/python-test_and_deploy.yml @@ -25,7 +25,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.9 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install dependencies @@ -50,7 +50,7 @@ jobs: with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install pypa/build From 825f62732af99f95c3d15e4ab78b9b974e94a9f7 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Fri, 25 Oct 2024 13:33:41 +0200 Subject: [PATCH 23/30] added accidentally removed code --- photonai/base/hyperpipe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 724119ca..f8001eca 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -945,6 +945,8 @@ def _finalize_optimization(self): logger.info("No feature importances available for {}!".format(self.optimum_pipe.elements[-1][0])) else: + self.results.best_config_feature_importances = feature_importances + # write backmapping file only if optimum_pipes inverse_transform works completely. # restriction: only a faulty inverse_transform is considered, missing ones are further ignored. # with warnings.catch_warnings(record=True) as w: From a334422ec7ecec28b9af92a92293f20409147304 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Fri, 25 Oct 2024 13:51:52 +0200 Subject: [PATCH 24/30] Revert "Feature/score train" --- photonai/base/hyperpipe.py | 11 +++-------- photonai/processing/inner_folds.py | 29 +++++++---------------------- photonai/processing/metrics.py | 4 ++-- photonai/processing/outer_folds.py | 10 +++------- 4 files changed, 15 insertions(+), 39 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index f8001eca..b0b701c1 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -297,8 +297,7 @@ def __init__(self, name: Optional[str], cache_folder: str = None, nr_of_processes: int = 1, multi_threading: bool = True, - allow_multidim_targets: bool = False, - score_train: bool = True): + allow_multidim_targets: bool = False): """ Initialize the object. @@ -421,9 +420,6 @@ def __init__(self, name: Optional[str], allow_multidim_targets: Allows multidimensional targets. - score_train: - metrics for the train-set are only calculated if score_train is true. - """ self.name = re.sub(r'\W+', '', name) @@ -518,7 +514,6 @@ def __init__(self, name: Optional[str], self.permutation_id = permutation_id self.allow_multidim_targets = allow_multidim_targets self.is_final_fit = False - self.score_train = score_train # ====================== Random Seed =========================== self.random_state = random_seed @@ -944,6 +939,7 @@ def _finalize_optimization(self): if not feature_importances: logger.info("No feature importances available for {}!".format(self.optimum_pipe.elements[-1][0])) else: + self.results.best_config_feature_importances = feature_importances self.results.best_config_feature_importances = feature_importances @@ -1091,8 +1087,7 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs): cache_folder=self.cache_folder, cache_updater=self.recursive_cache_folder_propagation, dummy_estimator=dummy_estimator, - result_obj=outer_fold, - score_train=self.score_train) + result_obj=outer_fold) # 2. monitor outputs self.results.outer_folds.append(outer_fold) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index 6d2f2541..1c665866 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -66,8 +66,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos, training: bool = False, cache_folder=None, cache_updater=None, - scorer: Scorer = None, - score_train: bool = True): + scorer: Scorer = None): self.params = specific_config self.pipe = pipe_ctor @@ -82,7 +81,6 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos, self.raise_error = raise_error self.training = training - self.score_train = score_train def fit(self, X, y, **kwargs): """Iterates over cross-validation folds and trains the pipeline, @@ -138,8 +136,7 @@ def fit(self, X, y, **kwargs): kwargs_cv_train), test_data=InnerFoldManager.JobData(test_X, test_y, test, kwargs_cv_test), - scorer=self.scorer, - score_train=self.score_train) + scorer=self.scorer) # only for unparallel processing # inform children in which inner fold we are @@ -227,8 +224,7 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t callbacks=self.optimization_constraints, train_data=self.JobData(train_cut_X, train_cut_y, train_cut, train_cut_kwargs), test_data=self.JobData(test_X, test_y, test, kwargs_cv_test), - scorer=self.scorer, - score_train=self.score_train) + scorer=self.scorer) curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(job_data) learning_curves.append([self.cross_validation_infos.learning_curves_cut.values[i], curr_test_cut.metrics, curr_train_cut.metrics]) @@ -243,7 +239,7 @@ def __init__(self, X, y, indices, cv_kwargs): class InnerCVJob: - def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer, score_train): + def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer): self.pipe = pipe self.config = config self.metrics = metrics @@ -251,7 +247,6 @@ def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scor self.train_data = train_data self.test_data = test_data self.scorer = scorer - self.score_train = score_train @staticmethod def update_config_item_with_inner_fold(config_item, fold_cnt, curr_train_fold, curr_test_fold, time_monitor, @@ -349,7 +344,7 @@ def fit_and_score(job: InnerCVJob): # start fitting pipe.fit(job.train_data.X, job.train_data.y, **job.train_data.cv_kwargs) - logger.debug('Scoring Test Data') + logger.debug('Scoring Training Data') # score test data curr_test_fold = InnerFoldManager.score(pipe, job.test_data.X, job.test_data.y, job.metrics, @@ -357,19 +352,9 @@ def fit_and_score(job: InnerCVJob): scorer=job.scorer, **job.test_data.cv_kwargs) - logger.debug('Scoring Training Data') + logger.debug('Scoring Test Data') # score train data - scores = {} - for metric in list(curr_test_fold.metrics.keys()): - scores[metric] = 0 - curr_train_fold = MDBScoreInformation(metrics=scores, - score_duration=0, - y_pred=list(np.zeros_like(job.train_data.y)), - y_true=list(job.train_data.y), - indices=np.asarray(job.train_data.indices).tolist(), - probabilities=[]) - if job.score_train: - curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, + curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, indices=job.train_data.indices, training=True, scorer=job.scorer, **job.train_data.cv_kwargs) diff --git a/photonai/processing/metrics.py b/photonai/processing/metrics.py index e704f81d..9dfc4c78 100644 --- a/photonai/processing/metrics.py +++ b/photonai/processing/metrics.py @@ -29,9 +29,9 @@ class Scorer: 'precision': ('sklearn.metrics', 'precision_score', 'score'), 'recall': ('sklearn.metrics', 'recall_score', 'score'), 'auc': ('sklearn.metrics', 'roc_auc_score', 'score'), - 'sensitivity': ('sklearn.metrics', 'recall_score', 'score'), + 'sensitivity': ('photonai.processing.metrics', 'sensitivity', 'score'), 'specificity': ('photonai.processing.metrics', 'specificity', 'score'), - 'balanced_accuracy': ('sklearn.metrics', 'balanced_accuracy_score', 'score'), + 'balanced_accuracy': ('photonai.processing.metrics', 'balanced_accuracy', 'score'), 'categorical_accuracy': ('photonai.processing.metrics', 'categorical_accuracy_score', 'score'), # Regression diff --git a/photonai/processing/outer_folds.py b/photonai/processing/outer_folds.py index 9235fae1..4891b14e 100644 --- a/photonai/processing/outer_folds.py +++ b/photonai/processing/outer_folds.py @@ -63,8 +63,7 @@ def __init__(self, pipe, cache_folder=None, cache_updater=None, dummy_estimator=None, - result_obj=None, - score_train: bool = True): + result_obj=None): self.outer_fold_id = outer_fold_id self.cross_validation_info = cross_validation_info self.scorer = Scorer(optimization_info.metrics) @@ -72,7 +71,6 @@ def __init__(self, pipe, self._pipe = pipe self.copy_pipe_fnc = self._pipe.copy_me self.dummy_estimator = dummy_estimator - self.score_train = score_train self.cache_folder = cache_folder self.cache_updater = cache_updater @@ -248,7 +246,6 @@ def fit(self, X, y=None, **kwargs): indices=self.cross_validation_info.outer_folds[self.outer_fold_id].test_indices, metrics=self.optimization_info.metrics, scorer=self.scorer, - score_train=self.score_train, **self._test_kwargs) logger.debug('... scoring training data') @@ -258,7 +255,6 @@ def fit(self, X, y=None, **kwargs): metrics=self.optimization_info.metrics, training=True, scorer=self.scorer, - score_train=self.score_train, **self._validation_kwargs) best_config_performance_mdb.training = train_score_mdb @@ -390,7 +386,7 @@ def _fit_dummy(self): self.dummy_estimator.fit(dummy_y, self._validation_y) train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y, metrics=self.optimization_info.metrics, - scorer=self.scorer, score_train=self.score_train) + scorer=self.scorer) # fill result tree with fold information inner_fold = MDBInnerFold() @@ -400,7 +396,7 @@ def _fit_dummy(self): test_scores = InnerFoldManager.score(self.dummy_estimator, self._test_X, self._test_y, metrics=self.optimization_info.metrics, - scorer=self.scorer, score_train=self.score_train) + scorer=self.scorer) print_metrics("DUMMY", test_scores.metrics) inner_fold.validation = test_scores From ee2b1a4cc65745133a2d044af222fac8d08a46d5 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 4 Nov 2024 10:23:56 +0100 Subject: [PATCH 25/30] Added score_train parameter --- photonai/base/hyperpipe.py | 10 ++++++++-- photonai/processing/inner_folds.py | 29 ++++++++++++++++++++++------- photonai/processing/outer_folds.py | 10 +++++++--- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index b0b701c1..78d96693 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -297,7 +297,8 @@ def __init__(self, name: Optional[str], cache_folder: str = None, nr_of_processes: int = 1, multi_threading: bool = True, - allow_multidim_targets: bool = False): + allow_multidim_targets: bool = False, + score_train: bool = True): """ Initialize the object. @@ -420,6 +421,9 @@ def __init__(self, name: Optional[str], allow_multidim_targets: Allows multidimensional targets. + score_train: + metrics for the train-set are only calculated if score_train is true. + """ self.name = re.sub(r'\W+', '', name) @@ -514,6 +518,7 @@ def __init__(self, name: Optional[str], self.permutation_id = permutation_id self.allow_multidim_targets = allow_multidim_targets self.is_final_fit = False + self.score_train = score_train # ====================== Random Seed =========================== self.random_state = random_seed @@ -1087,7 +1092,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs): cache_folder=self.cache_folder, cache_updater=self.recursive_cache_folder_propagation, dummy_estimator=dummy_estimator, - result_obj=outer_fold) + result_obj=outer_fold, + score_train=self.score_train) # 2. monitor outputs self.results.outer_folds.append(outer_fold) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index 1c665866..6d2f2541 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -66,7 +66,8 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos, training: bool = False, cache_folder=None, cache_updater=None, - scorer: Scorer = None): + scorer: Scorer = None, + score_train: bool = True): self.params = specific_config self.pipe = pipe_ctor @@ -81,6 +82,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos, self.raise_error = raise_error self.training = training + self.score_train = score_train def fit(self, X, y, **kwargs): """Iterates over cross-validation folds and trains the pipeline, @@ -136,7 +138,8 @@ def fit(self, X, y, **kwargs): kwargs_cv_train), test_data=InnerFoldManager.JobData(test_X, test_y, test, kwargs_cv_test), - scorer=self.scorer) + scorer=self.scorer, + score_train=self.score_train) # only for unparallel processing # inform children in which inner fold we are @@ -224,7 +227,8 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t callbacks=self.optimization_constraints, train_data=self.JobData(train_cut_X, train_cut_y, train_cut, train_cut_kwargs), test_data=self.JobData(test_X, test_y, test, kwargs_cv_test), - scorer=self.scorer) + scorer=self.scorer, + score_train=self.score_train) curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(job_data) learning_curves.append([self.cross_validation_infos.learning_curves_cut.values[i], curr_test_cut.metrics, curr_train_cut.metrics]) @@ -239,7 +243,7 @@ def __init__(self, X, y, indices, cv_kwargs): class InnerCVJob: - def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer): + def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer, score_train): self.pipe = pipe self.config = config self.metrics = metrics @@ -247,6 +251,7 @@ def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scor self.train_data = train_data self.test_data = test_data self.scorer = scorer + self.score_train = score_train @staticmethod def update_config_item_with_inner_fold(config_item, fold_cnt, curr_train_fold, curr_test_fold, time_monitor, @@ -344,7 +349,7 @@ def fit_and_score(job: InnerCVJob): # start fitting pipe.fit(job.train_data.X, job.train_data.y, **job.train_data.cv_kwargs) - logger.debug('Scoring Training Data') + logger.debug('Scoring Test Data') # score test data curr_test_fold = InnerFoldManager.score(pipe, job.test_data.X, job.test_data.y, job.metrics, @@ -352,9 +357,19 @@ def fit_and_score(job: InnerCVJob): scorer=job.scorer, **job.test_data.cv_kwargs) - logger.debug('Scoring Test Data') + logger.debug('Scoring Training Data') # score train data - curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, + scores = {} + for metric in list(curr_test_fold.metrics.keys()): + scores[metric] = 0 + curr_train_fold = MDBScoreInformation(metrics=scores, + score_duration=0, + y_pred=list(np.zeros_like(job.train_data.y)), + y_true=list(job.train_data.y), + indices=np.asarray(job.train_data.indices).tolist(), + probabilities=[]) + if job.score_train: + curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, indices=job.train_data.indices, training=True, scorer=job.scorer, **job.train_data.cv_kwargs) diff --git a/photonai/processing/outer_folds.py b/photonai/processing/outer_folds.py index 4891b14e..9235fae1 100644 --- a/photonai/processing/outer_folds.py +++ b/photonai/processing/outer_folds.py @@ -63,7 +63,8 @@ def __init__(self, pipe, cache_folder=None, cache_updater=None, dummy_estimator=None, - result_obj=None): + result_obj=None, + score_train: bool = True): self.outer_fold_id = outer_fold_id self.cross_validation_info = cross_validation_info self.scorer = Scorer(optimization_info.metrics) @@ -71,6 +72,7 @@ def __init__(self, pipe, self._pipe = pipe self.copy_pipe_fnc = self._pipe.copy_me self.dummy_estimator = dummy_estimator + self.score_train = score_train self.cache_folder = cache_folder self.cache_updater = cache_updater @@ -246,6 +248,7 @@ def fit(self, X, y=None, **kwargs): indices=self.cross_validation_info.outer_folds[self.outer_fold_id].test_indices, metrics=self.optimization_info.metrics, scorer=self.scorer, + score_train=self.score_train, **self._test_kwargs) logger.debug('... scoring training data') @@ -255,6 +258,7 @@ def fit(self, X, y=None, **kwargs): metrics=self.optimization_info.metrics, training=True, scorer=self.scorer, + score_train=self.score_train, **self._validation_kwargs) best_config_performance_mdb.training = train_score_mdb @@ -386,7 +390,7 @@ def _fit_dummy(self): self.dummy_estimator.fit(dummy_y, self._validation_y) train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y, metrics=self.optimization_info.metrics, - scorer=self.scorer) + scorer=self.scorer, score_train=self.score_train) # fill result tree with fold information inner_fold = MDBInnerFold() @@ -396,7 +400,7 @@ def _fit_dummy(self): test_scores = InnerFoldManager.score(self.dummy_estimator, self._test_X, self._test_y, metrics=self.optimization_info.metrics, - scorer=self.scorer) + scorer=self.scorer, score_train=self.score_train) print_metrics("DUMMY", test_scores.metrics) inner_fold.validation = test_scores From f5a5f8f6a02a26846d5ae8b8371857a9689016a7 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 4 Nov 2024 10:24:18 +0100 Subject: [PATCH 26/30] Adapted metrics to sklearn --- photonai/processing/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/photonai/processing/metrics.py b/photonai/processing/metrics.py index 9dfc4c78..e704f81d 100644 --- a/photonai/processing/metrics.py +++ b/photonai/processing/metrics.py @@ -29,9 +29,9 @@ class Scorer: 'precision': ('sklearn.metrics', 'precision_score', 'score'), 'recall': ('sklearn.metrics', 'recall_score', 'score'), 'auc': ('sklearn.metrics', 'roc_auc_score', 'score'), - 'sensitivity': ('photonai.processing.metrics', 'sensitivity', 'score'), + 'sensitivity': ('sklearn.metrics', 'recall_score', 'score'), 'specificity': ('photonai.processing.metrics', 'specificity', 'score'), - 'balanced_accuracy': ('photonai.processing.metrics', 'balanced_accuracy', 'score'), + 'balanced_accuracy': ('sklearn.metrics', 'balanced_accuracy_score', 'score'), 'categorical_accuracy': ('photonai.processing.metrics', 'categorical_accuracy_score', 'score'), # Regression From 9a2c3177eb0d68d6ca8abe6d0f73ba7148ee6104 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 4 Nov 2024 10:51:23 +0100 Subject: [PATCH 27/30] removed unneeded statement --- photonai/base/hyperpipe.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 78d96693..0dcf4dbb 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -946,8 +946,6 @@ def _finalize_optimization(self): else: self.results.best_config_feature_importances = feature_importances - self.results.best_config_feature_importances = feature_importances - # write backmapping file only if optimum_pipes inverse_transform works completely. # restriction: only a faulty inverse_transform is considered, missing ones are further ignored. # with warnings.catch_warnings(record=True) as w: From 921b7f81f84c26b25db19e8a827a8506340d5c35 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 4 Nov 2024 11:06:40 +0100 Subject: [PATCH 28/30] fixed dummy error --- photonai/processing/inner_folds.py | 19 ++++++++++--------- photonai/processing/outer_folds.py | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index 6d2f2541..c7e90920 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -359,20 +359,21 @@ def fit_and_score(job: InnerCVJob): logger.debug('Scoring Training Data') # score train data - scores = {} - for metric in list(curr_test_fold.metrics.keys()): - scores[metric] = 0 - curr_train_fold = MDBScoreInformation(metrics=scores, - score_duration=0, - y_pred=list(np.zeros_like(job.train_data.y)), - y_true=list(job.train_data.y), - indices=np.asarray(job.train_data.indices).tolist(), - probabilities=[]) if job.score_train: curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, indices=job.train_data.indices, training=True, scorer=job.scorer, **job.train_data.cv_kwargs) + else: + scores = {} + for metric in list(curr_test_fold.metrics.keys()): + scores[metric] = 0 + curr_train_fold = MDBScoreInformation(metrics=scores, + score_duration=0, + y_pred=list(np.zeros_like(job.train_data.y)), + y_true=list(job.train_data.y), + indices=np.asarray(job.train_data.indices).tolist(), + probabilities=[]) return curr_test_fold, curr_train_fold diff --git a/photonai/processing/outer_folds.py b/photonai/processing/outer_folds.py index 9235fae1..9d453a56 100644 --- a/photonai/processing/outer_folds.py +++ b/photonai/processing/outer_folds.py @@ -390,7 +390,7 @@ def _fit_dummy(self): self.dummy_estimator.fit(dummy_y, self._validation_y) train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y, metrics=self.optimization_info.metrics, - scorer=self.scorer, score_train=self.score_train) + scorer=self.scorer) # fill result tree with fold information inner_fold = MDBInnerFold() @@ -400,7 +400,7 @@ def _fit_dummy(self): test_scores = InnerFoldManager.score(self.dummy_estimator, self._test_X, self._test_y, metrics=self.optimization_info.metrics, - scorer=self.scorer, score_train=self.score_train) + scorer=self.scorer) print_metrics("DUMMY", test_scores.metrics) inner_fold.validation = test_scores From 2683ca94b17fb7a11b8ede2be423806d355d97d6 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 4 Nov 2024 11:57:04 +0100 Subject: [PATCH 29/30] Fixed score_train error --- photonai/base/hyperpipe.py | 8 ++++- photonai/processing/inner_folds.py | 33 +++++++++++---------- photonai/processing/outer_folds.py | 9 ++++-- test/integration_tests/test_architecture.py | 1 + 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py index 0dcf4dbb..e3854747 100644 --- a/photonai/base/hyperpipe.py +++ b/photonai/base/hyperpipe.py @@ -298,6 +298,7 @@ def __init__(self, name: Optional[str], nr_of_processes: int = 1, multi_threading: bool = True, allow_multidim_targets: bool = False, + raise_error: bool = False, score_train: bool = True): """ Initialize the object. @@ -424,6 +425,9 @@ def __init__(self, name: Optional[str], score_train: metrics for the train-set are only calculated if score_train is true. + raise_error: + if true, errors in the inner fold are raised instead of suppressed as warnings. + """ self.name = re.sub(r'\W+', '', name) @@ -519,6 +523,7 @@ def __init__(self, name: Optional[str], self.allow_multidim_targets = allow_multidim_targets self.is_final_fit = False self.score_train = score_train + self.raise_error = raise_error # ====================== Random Seed =========================== self.random_state = random_seed @@ -1091,7 +1096,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs): cache_updater=self.recursive_cache_folder_propagation, dummy_estimator=dummy_estimator, result_obj=outer_fold, - score_train=self.score_train) + score_train=self.score_train, + raise_error=self.raise_error) # 2. monitor outputs self.results.outer_folds.append(outer_fold) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index c7e90920..fb4b3ff5 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -359,28 +359,18 @@ def fit_and_score(job: InnerCVJob): logger.debug('Scoring Training Data') # score train data - if job.score_train: - curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, - indices=job.train_data.indices, - training=True, - scorer=job.scorer, **job.train_data.cv_kwargs) - else: - scores = {} - for metric in list(curr_test_fold.metrics.keys()): - scores[metric] = 0 - curr_train_fold = MDBScoreInformation(metrics=scores, - score_duration=0, - y_pred=list(np.zeros_like(job.train_data.y)), - y_true=list(job.train_data.y), - indices=np.asarray(job.train_data.indices).tolist(), - probabilities=[]) + curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics, + indices=job.train_data.indices, + training=True, + score_train=job.score_train, + scorer=job.scorer, **job.train_data.cv_kwargs) return curr_test_fold, curr_train_fold @staticmethod def score(estimator, X, y_true, metrics, indices=[], calculate_metrics: bool = True, training: bool = False, - scorer: Scorer = None, **kwargs): + scorer: Scorer = None, score_train=True, **kwargs): """Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics @@ -426,6 +416,17 @@ def score(estimator, X, y_true, metrics, indices=[], output_metrics = {} + if training and not score_train: + scores = {} + for metric in list(metrics.keys()): + scores[metric] = 0 + return MDBScoreInformation(metrics=scores, + score_duration=0, + y_pred=list(np.zeros_like(y_true)), + y_true=list(y_true), + indices=np.asarray(indices).tolist(), + probabilities=[]) + if not training: y_pred = estimator.predict(X, **kwargs) else: diff --git a/photonai/processing/outer_folds.py b/photonai/processing/outer_folds.py index 9d453a56..d054a81e 100644 --- a/photonai/processing/outer_folds.py +++ b/photonai/processing/outer_folds.py @@ -64,6 +64,7 @@ def __init__(self, pipe, cache_updater=None, dummy_estimator=None, result_obj=None, + raise_error=False, score_train: bool = True): self.outer_fold_id = outer_fold_id self.cross_validation_info = cross_validation_info @@ -73,6 +74,7 @@ def __init__(self, pipe, self.copy_pipe_fnc = self._pipe.copy_me self.dummy_estimator = dummy_estimator self.score_train = score_train + self.raise_error = raise_error self.cache_folder = cache_folder self.cache_updater = cache_updater @@ -248,7 +250,6 @@ def fit(self, X, y=None, **kwargs): indices=self.cross_validation_info.outer_folds[self.outer_fold_id].test_indices, metrics=self.optimization_info.metrics, scorer=self.scorer, - score_train=self.score_train, **self._test_kwargs) logger.debug('... scoring training data') @@ -312,7 +313,8 @@ def objective_function(self, current_config): self.cross_validation_info, self.outer_fold_id, self.constraint_objects, cache_folder=self.cache_folder, cache_updater=self.cache_updater, - scorer=self.scorer) + scorer=self.scorer, + raise_error=self.raise_error) # Test the configuration cross validated by inner_cv object current_config_mdb = hp.fit(self._validation_X, self._validation_y, **self._validation_kwargs) @@ -389,7 +391,9 @@ def _fit_dummy(self): dummy_y = np.reshape(self._validation_y, (-1, 1)) self.dummy_estimator.fit(dummy_y, self._validation_y) train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y, + training=True, metrics=self.optimization_info.metrics, + score_train=self.score_train, scorer=self.scorer) # fill result tree with fold information @@ -400,6 +404,7 @@ def _fit_dummy(self): test_scores = InnerFoldManager.score(self.dummy_estimator, self._test_X, self._test_y, metrics=self.optimization_info.metrics, + score_train=self.score_train, scorer=self.scorer) print_metrics("DUMMY", test_scores.metrics) inner_fold.validation = test_scores diff --git a/test/integration_tests/test_architecture.py b/test/integration_tests/test_architecture.py index b7b769cb..7f4592be 100644 --- a/test/integration_tests/test_architecture.py +++ b/test/integration_tests/test_architecture.py @@ -67,6 +67,7 @@ def create_hyperpipes(metrics: list = None, inner_cv=KFold(n_splits=3, shuffle=T use_test_set=eval_final_performance, performance_constraints=performance_constraints, cache_folder=cache_folder, + raise_error=True, verbosity=0) return pipe From 90683dad9fa4a7dd2134b1e708eb4948afba7e24 Mon Sep 17 00:00:00 2001 From: Jan Ernsting Date: Mon, 4 Nov 2024 12:11:22 +0100 Subject: [PATCH 30/30] Fixed dummy special case --- photonai/processing/inner_folds.py | 4 ++-- photonai/processing/outer_folds.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/photonai/processing/inner_folds.py b/photonai/processing/inner_folds.py index fb4b3ff5..17ad672f 100644 --- a/photonai/processing/inner_folds.py +++ b/photonai/processing/inner_folds.py @@ -370,7 +370,7 @@ def fit_and_score(job: InnerCVJob): @staticmethod def score(estimator, X, y_true, metrics, indices=[], calculate_metrics: bool = True, training: bool = False, - scorer: Scorer = None, score_train=True, **kwargs): + dummy: bool = False, scorer: Scorer = None, score_train=True, **kwargs): """Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics @@ -427,7 +427,7 @@ def score(estimator, X, y_true, metrics, indices=[], indices=np.asarray(indices).tolist(), probabilities=[]) - if not training: + if not training or (training and dummy): y_pred = estimator.predict(X, **kwargs) else: X, y_true_new, kwargs_new = estimator.transform(X, y_true, **kwargs) diff --git a/photonai/processing/outer_folds.py b/photonai/processing/outer_folds.py index d054a81e..510b7f1e 100644 --- a/photonai/processing/outer_folds.py +++ b/photonai/processing/outer_folds.py @@ -392,6 +392,7 @@ def _fit_dummy(self): self.dummy_estimator.fit(dummy_y, self._validation_y) train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y, training=True, + dummy=True, metrics=self.optimization_info.metrics, score_train=self.score_train, scorer=self.scorer)