Merge pull request #89 from wwu-mmll/develop

Develop
wwu-mmll · Nov 4, 2024 · ea31ea7 · ea31ea7
2 parents 762d713 + b3e29cd
commit ea31ea7
Show file tree

Hide file tree

Showing 28 changed files with 503 additions and 84 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -7,9 +7,11 @@ version: 2
 updates:
   - package-ecosystem: "github-actions" # See documentation for possible values
     directory: "/" # Location of package manifests
+    target-branch: "develop"
     schedule:
       interval: "daily"
   - package-ecosystem: "pip"
     directory: "/"
+    target-branch: "develop"
     schedule:
       interval: "daily"
diff --git a/.github/workflows/documentation_build_and_update.yml b/.github/workflows/documentation_build_and_update.yml
@@ -13,7 +13,7 @@ jobs:
           fetch-depth: 0
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.9'
 

diff --git a/.github/workflows/documentation_deployment.yml b/.github/workflows/documentation_deployment.yml
@@ -16,7 +16,7 @@ jobs:
           fetch-depth: 0
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.9'
 

diff --git a/.github/workflows/python-deploy_to_pypi.yml b/.github/workflows/python-deploy_to_pypi.yml
@@ -13,7 +13,7 @@ jobs:
       with:
         fetch-depth: 0
     - name: Set up Python 3.10.8
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.10.8
     - name: Install pypa/build

diff --git a/.github/workflows/python-test_and_deploy.yml b/.github/workflows/python-test_and_deploy.yml
@@ -25,7 +25,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python 3.9
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.9
     - name: Install dependencies
@@ -50,7 +50,7 @@ jobs:
       with:
         fetch-depth: 0
     - name: Set up Python 3.9
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.9
     - name: Install pypa/build

diff --git a/examples/advanced/connectome_based_predictive_modeling_example.py b/examples/advanced/connectome_based_predictive_modeling_example.py
@@ -0,0 +1,34 @@
+"""
+Connectome-based predictive modeling
+
+CPM is a method described in the following Nature Protocols article: https://www.nature.com/articles/nprot.2016.178
+It has been used in a number of publications to predict behavior from connectivity data.
+CPM works similar to a feature selection method. First, relevant edges (connectivity values) are identified through
+correlation analysis. Every edge is correlated with the predictive target. Only significant edges will be used in the
+subsequent steps. Next, the edge values for all significant positive and for all significant negative correlations are
+summed to create two new features. Lastly, these two features are used as input to another classifier.
+
+In this example, no connectivity data is used, but the method will still work.
+This example is just supposed to show how to use CPM as feature selection and integration tool in PHOTONAI.
+"""
+
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import KFold
+
+from photonai import Hyperpipe, PipelineElement
+
+
+X, y = load_breast_cancer(return_X_y=True)
+
+pipe = Hyperpipe("cpm_feature_selection_pipe",
+                  outer_cv=KFold(n_splits=5, shuffle=True, random_state=15),
+                  inner_cv=KFold(n_splits=5, shuffle=True, random_state=15),
+                  metrics=["balanced_accuracy"], best_config_metric="balanced_accuracy",
+                  project_folder='./tmp')
+
+pipe += PipelineElement('CPMFeatureSelection', hyperparameters={'corr_method': ['pearson', 'spearman'],
+                                                                'p_threshold': [0.01, 0.05]})
+
+pipe += PipelineElement('LogisticRegression')
+
+pipe.fit(X, y)
diff --git a/examples/advanced/gpboost.py b/examples/advanced/gpboost.py
@@ -0,0 +1,85 @@
+# pip install gpboost -U
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.model_selection import GroupKFold, KFold
+from photonai.base import Hyperpipe, PipelineElement
+import numpy as np
+import pandas as pd
+import gpboost as gpb
+# from gpboost import GPBoostRegressor
+
+
+class GPBoostDataWrapper(BaseEstimator, ClassifierMixin):
+
+    def __init__(self):
+        self.needs_covariates = True
+        # self.gpmodel = gpb.GPModel(likelihood="gaussian")
+        self.gpboost = None
+
+
+    def fit(self, X, y, **kwargs):
+        self.gpboost = gpb.GPBoostRegressor()
+        if "clusters" in kwargs:
+            clst = pd.Series(kwargs["clusters"])
+            gpmodel = gpb.GPModel(likelihood="gaussian", group_data=clst)
+            self.gpboost.fit(X, y, gp_model=gpmodel)
+        else:
+            raise NotImplementedError("GPBoost needs clusters")
+        return self
+
+    def predict(self, X, **kwargs):
+        clst = pd.Series(kwargs["clusters"])
+        preds = self.gpboost.predict(X, group_data_pred=clst)
+        preds = preds["response_mean"]
+        return preds
+
+    def save(self):
+        return None
+
+
+def get_gpboost_pipe(pipe_name, project_folder, split="group"):
+
+    if split == "group":
+        outercv = GroupKFold(n_splits=10)
+    else:
+        outercv = KFold(n_splits=10)
+
+    my_pipe = Hyperpipe(pipe_name,
+                        optimizer='grid_search',
+                        metrics=['mean_absolute_error', 'mean_squared_error',
+                                 'spearman_correlation', 'pearson_correlation'],
+                        best_config_metric='mean_absolute_error',
+                        outer_cv=outercv,
+                        inner_cv=KFold(n_splits=10),
+                        calculate_metrics_across_folds=True,
+                        use_test_set=True,
+                        verbosity=1,
+                        project_folder=project_folder)
+
+    # Add transformer elements
+    my_pipe += PipelineElement("StandardScaler", hyperparameters={},
+                               test_disabled=True, with_mean=True, with_std=True)
+
+    my_pipe += PipelineElement.create("GPBoost", GPBoostDataWrapper(), hyperparameters={})
+
+    return my_pipe
+
+
+def get_mock_data():
+
+    X = np.random.randint(10, size=(200, 9))
+    y = np.sum(X, axis=1)
+    clst = np.random.randint(10, size=200)
+
+    return X, y, clst
+
+
+if __name__ == '__main__':
+
+
+    X, y, clst = get_mock_data()
+
+    # define project folder
+    project_folder = "./tmp/gpboost_debug"
+
+    my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random")
+    my_pipe.fit(X, y, clusters=clst)
diff --git a/examples/basic/classification_custom.py b/examples/basic/classification_custom.py
@@ -5,7 +5,7 @@
 my_pipe = Hyperpipe('basic_svm_pipe',
                     inner_cv=KFold(n_splits=5),
                     outer_cv=KFold(n_splits=3),
-                    optimizer='sk_opt',
+                    optimizer='random_grid_search',
                     optimizer_params={'n_configurations': 15},
                     metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
                     best_config_metric='accuracy',

diff --git a/examples/basic/regression.py b/examples/basic/regression.py
@@ -2,6 +2,7 @@
 from photonai import RegressionPipe
 
 my_pipe = RegressionPipe('diabetes',
+                         best_config_metric='median_absolute_error',
                          add_default_pipeline_elements=True,
                          scaling=True,
                          imputation=False,

diff --git a/examples/optimizer/meta_optimizer.py b/examples/optimizer/meta_optimizer.py
@@ -7,7 +7,8 @@
                     inner_cv=KFold(n_splits=5),
                     outer_cv=KFold(n_splits=3),
                     optimizer='switch',
-                    optimizer_params={'name': 'sk_opt', 'n_configurations': 50},
+                    # optimizer_params={'name': 'grid_search'},
+                    optimizer_params={'name': 'random_search', 'n_configurations': 10},
                     metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
                     best_config_metric='accuracy',
                     project_folder='./tmp',
@@ -16,7 +17,7 @@
 my_pipe.add(PipelineElement('StandardScaler'))
 
 my_pipe += PipelineElement('PCA',
-                           hyperparameters={'n_components': IntegerRange(10, 30)},
+                           hyperparameters={'n_components': IntegerRange(10, 30, step=5)},
                            test_disabled=True)
 
 # set up two learning algorithms in an ensemble
@@ -25,15 +26,15 @@
 estimator_selection += PipelineElement('RandomForestClassifier',
                                        criterion='gini',
                                        hyperparameters={'min_samples_split': IntegerRange(2, 4),
-                                                        'max_features': ['auto', 'sqrt', 'log2'],
+                                                        'max_features': ['sqrt', 'log2'],
                                                         'bootstrap': [True, False]})
 estimator_selection += PipelineElement('SVC',
-                                       hyperparameters={'C': FloatRange(0.5, 25),
+                                       hyperparameters={'C': FloatRange(0.5, 25, num=10),
                                                         'kernel': ['linear', 'rbf']})
 
 my_pipe += estimator_selection
 
 X, y = load_breast_cancer(return_X_y=True)
 my_pipe.fit(X, y)
 
-my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator()
+print(my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator())
diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py
@@ -297,7 +297,9 @@ def __init__(self, name: Optional[str],
                  cache_folder: str = None,
                  nr_of_processes: int = 1,
                  multi_threading: bool = True,
-                 allow_multidim_targets: bool = False):
+                 allow_multidim_targets: bool = False,
+                 raise_error: bool = False,
+                 score_train: bool = True):
         """
         Initialize the object.
 
@@ -420,6 +422,12 @@ def __init__(self, name: Optional[str],
             allow_multidim_targets:
                 Allows multidimensional targets.
 
+            score_train:
+                metrics for the train-set are only calculated if score_train is true.
+
+            raise_error:
+                if true, errors in the inner fold are raised instead of suppressed as warnings.
+
         """
 
         self.name = re.sub(r'\W+', '', name)
@@ -514,6 +522,8 @@ def __init__(self, name: Optional[str],
         self.permutation_id = permutation_id
         self.allow_multidim_targets = allow_multidim_targets
         self.is_final_fit = False
+        self.score_train = score_train
+        self.raise_error = raise_error
 
         # ====================== Random Seed ===========================
         self.random_state = random_seed
@@ -933,7 +943,7 @@ def _finalize_optimization(self):
                     logger.error(str(e))
 
                 # get feature importances of optimum pipe
-                logger.info("Mapping back feature importances...")
+                # logger.info("Mapping back feature importances...")
                 feature_importances = self.optimum_pipe.feature_importances_
 
                 if not feature_importances:
@@ -943,18 +953,18 @@ def _finalize_optimization(self):
 
                     # write backmapping file only if optimum_pipes inverse_transform works completely.
                     # restriction: only a faulty inverse_transform is considered, missing ones are further ignored.
-                    with warnings.catch_warnings(record=True) as w:
-                        # get backmapping
-                        backmapping, _, _ = self.optimum_pipe.\
-                            inverse_transform(np.array(feature_importances).reshape(1, -1), None)
-
-                        if not any("The inverse transformation is not possible for" in s
-                                   for s in [e.message.args[0] for e in w]):
-                            # save backmapping
-                            self.results_handler.save_backmapping(
-                                filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
-                        else:
-                            logger.info('Could not save feature importance: backmapping NOT successful.')
+                    # with warnings.catch_warnings(record=True) as w:
+                    #     # get backmapping
+                    #     backmapping, _, _ = self.optimum_pipe.\
+                    #         inverse_transform(np.array(feature_importances).reshape(1, -1), None)
+                    #
+                    #     if not any("The inverse transformation is not possible for" in s
+                    #                for s in [e.message.args[0] for e in w]):
+                    #         # save backmapping
+                    #         self.results_handler.save_backmapping(
+                    #             filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
+                    #     else:
+                    #         logger.info('Could not save feature importance: backmapping NOT successful.')
 
                 # save learning curves
                 if self.cross_validation.learning_curves:
@@ -1085,7 +1095,9 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
                                                            cache_folder=self.cache_folder,
                                                            cache_updater=self.recursive_cache_folder_propagation,
                                                            dummy_estimator=dummy_estimator,
-                                                           result_obj=outer_fold)
+                                                           result_obj=outer_fold,
+                                                           score_train=self.score_train,
+                                                           raise_error=self.raise_error)
                     # 2. monitor outputs
                     self.results.outer_folds.append(outer_fold)
 
@@ -1243,6 +1255,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar
 
             # get feature importances
             logger.photon_system_log("Permutation Importances: Calculating performances for " + fold_str)
+
             perm_imps = permutation_importance(pipeline, test_X, test_y, **kwargs)
 
             # store into list

diff --git a/photonai/base/model_zoo.py b/photonai/base/model_zoo.py
@@ -217,7 +217,7 @@ def set_default_pipeline(self, scaling, imputation, imputation_nan_value, featur
                     logger.photon_system_log("---")
         logger.stars()
 
-    def fit(self, X=None, y=None):
+    def fit(self, X=None, y=None, **kwargs):
         if (X is not None and self.X_csv_path is not None) or (y is not None and self.y_csv_path is not None):
             raise ValueError("You can either give the fit function data or the pipe definition paths "
                              "to csv files to load data from. Not both.")
@@ -228,7 +228,7 @@ def fit(self, X=None, y=None):
 
         X = X if X is not None else pd.read_csv(self.X_csv_path, delimiter=self.delimiter)
         y = y if y is not None else pd.read_csv(self.y_csv_path, delimiter=self.delimiter)
-        super().fit(X, y)
+        super().fit(X, y, **kwargs)
 
 
 class ClassificationPipe(DefaultPipeline):

diff --git a/photonai/base/registry/PhotonCore.json b/photonai/base/registry/PhotonCore.json
@@ -295,6 +295,10 @@
     "sklearn.linear_model.LogisticRegression",
     "Estimator"
   ],
+  "LinearDiscriminantAnalysis": [
+    "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
+    "Transformer"
+  ],
   "PassiveAggressiveClassifier":[
     "sklearn.linear_model.PassiveAggressiveClassifier",
     "Estimator"
@@ -486,5 +490,9 @@
   "LocallyLinearEmbedding":[
     "sklearn.manifold.LocallyLinearEmbedding",
     "Transformer"
+  ],
+  "CPMFeatureSelection":[
+    "photonai.modelwrapper.cpm_feature_selection.CPMFeatureSelection",
+    "Estimator"
   ]
 }