Skip to content

Commit

Permalink
Merge pull request #89 from wwu-mmll/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
NilsWinter authored Nov 4, 2024
2 parents 762d713 + b3e29cd commit ea31ea7
Show file tree
Hide file tree
Showing 28 changed files with 503 additions and 84 deletions.
2 changes: 2 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ version: 2
updates:
- package-ecosystem: "github-actions" # See documentation for possible values
directory: "/" # Location of package manifests
target-branch: "develop"
schedule:
interval: "daily"
- package-ecosystem: "pip"
directory: "/"
target-branch: "develop"
schedule:
interval: "daily"
2 changes: 1 addition & 1 deletion .github/workflows/documentation_build_and_update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fetch-depth: 0

- name: Install Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.9'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/documentation_deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fetch-depth: 0

- name: Install Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.9'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-deploy_to_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
with:
fetch-depth: 0
- name: Set up Python 3.10.8
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.10.8
- name: Install pypa/build
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python-test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Install dependencies
Expand All @@ -50,7 +50,7 @@ jobs:
with:
fetch-depth: 0
- name: Set up Python 3.9
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Install pypa/build
Expand Down
34 changes: 34 additions & 0 deletions examples/advanced/connectome_based_predictive_modeling_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Connectome-based predictive modeling
CPM is a method described in the following Nature Protocols article: https://www.nature.com/articles/nprot.2016.178
It has been used in a number of publications to predict behavior from connectivity data.
CPM works similar to a feature selection method. First, relevant edges (connectivity values) are identified through
correlation analysis. Every edge is correlated with the predictive target. Only significant edges will be used in the
subsequent steps. Next, the edge values for all significant positive and for all significant negative correlations are
summed to create two new features. Lastly, these two features are used as input to another classifier.
In this example, no connectivity data is used, but the method will still work.
This example is just supposed to show how to use CPM as feature selection and integration tool in PHOTONAI.
"""

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold

from photonai import Hyperpipe, PipelineElement


X, y = load_breast_cancer(return_X_y=True)

pipe = Hyperpipe("cpm_feature_selection_pipe",
outer_cv=KFold(n_splits=5, shuffle=True, random_state=15),
inner_cv=KFold(n_splits=5, shuffle=True, random_state=15),
metrics=["balanced_accuracy"], best_config_metric="balanced_accuracy",
project_folder='./tmp')

pipe += PipelineElement('CPMFeatureSelection', hyperparameters={'corr_method': ['pearson', 'spearman'],
'p_threshold': [0.01, 0.05]})

pipe += PipelineElement('LogisticRegression')

pipe.fit(X, y)
85 changes: 85 additions & 0 deletions examples/advanced/gpboost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# pip install gpboost -U
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GroupKFold, KFold
from photonai.base import Hyperpipe, PipelineElement
import numpy as np
import pandas as pd
import gpboost as gpb
# from gpboost import GPBoostRegressor


class GPBoostDataWrapper(BaseEstimator, ClassifierMixin):

def __init__(self):
self.needs_covariates = True
# self.gpmodel = gpb.GPModel(likelihood="gaussian")
self.gpboost = None


def fit(self, X, y, **kwargs):
self.gpboost = gpb.GPBoostRegressor()
if "clusters" in kwargs:
clst = pd.Series(kwargs["clusters"])
gpmodel = gpb.GPModel(likelihood="gaussian", group_data=clst)
self.gpboost.fit(X, y, gp_model=gpmodel)
else:
raise NotImplementedError("GPBoost needs clusters")
return self

def predict(self, X, **kwargs):
clst = pd.Series(kwargs["clusters"])
preds = self.gpboost.predict(X, group_data_pred=clst)
preds = preds["response_mean"]
return preds

def save(self):
return None


def get_gpboost_pipe(pipe_name, project_folder, split="group"):

if split == "group":
outercv = GroupKFold(n_splits=10)
else:
outercv = KFold(n_splits=10)

my_pipe = Hyperpipe(pipe_name,
optimizer='grid_search',
metrics=['mean_absolute_error', 'mean_squared_error',
'spearman_correlation', 'pearson_correlation'],
best_config_metric='mean_absolute_error',
outer_cv=outercv,
inner_cv=KFold(n_splits=10),
calculate_metrics_across_folds=True,
use_test_set=True,
verbosity=1,
project_folder=project_folder)

# Add transformer elements
my_pipe += PipelineElement("StandardScaler", hyperparameters={},
test_disabled=True, with_mean=True, with_std=True)

my_pipe += PipelineElement.create("GPBoost", GPBoostDataWrapper(), hyperparameters={})

return my_pipe


def get_mock_data():

X = np.random.randint(10, size=(200, 9))
y = np.sum(X, axis=1)
clst = np.random.randint(10, size=200)

return X, y, clst


if __name__ == '__main__':


X, y, clst = get_mock_data()

# define project folder
project_folder = "./tmp/gpboost_debug"

my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random")
my_pipe.fit(X, y, clusters=clst)
2 changes: 1 addition & 1 deletion examples/basic/classification_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
my_pipe = Hyperpipe('basic_svm_pipe',
inner_cv=KFold(n_splits=5),
outer_cv=KFold(n_splits=3),
optimizer='sk_opt',
optimizer='random_grid_search',
optimizer_params={'n_configurations': 15},
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
best_config_metric='accuracy',
Expand Down
1 change: 1 addition & 0 deletions examples/basic/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from photonai import RegressionPipe

my_pipe = RegressionPipe('diabetes',
best_config_metric='median_absolute_error',
add_default_pipeline_elements=True,
scaling=True,
imputation=False,
Expand Down
11 changes: 6 additions & 5 deletions examples/optimizer/meta_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
inner_cv=KFold(n_splits=5),
outer_cv=KFold(n_splits=3),
optimizer='switch',
optimizer_params={'name': 'sk_opt', 'n_configurations': 50},
# optimizer_params={'name': 'grid_search'},
optimizer_params={'name': 'random_search', 'n_configurations': 10},
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
best_config_metric='accuracy',
project_folder='./tmp',
Expand All @@ -16,7 +17,7 @@
my_pipe.add(PipelineElement('StandardScaler'))

my_pipe += PipelineElement('PCA',
hyperparameters={'n_components': IntegerRange(10, 30)},
hyperparameters={'n_components': IntegerRange(10, 30, step=5)},
test_disabled=True)

# set up two learning algorithms in an ensemble
Expand All @@ -25,15 +26,15 @@
estimator_selection += PipelineElement('RandomForestClassifier',
criterion='gini',
hyperparameters={'min_samples_split': IntegerRange(2, 4),
'max_features': ['auto', 'sqrt', 'log2'],
'max_features': ['sqrt', 'log2'],
'bootstrap': [True, False]})
estimator_selection += PipelineElement('SVC',
hyperparameters={'C': FloatRange(0.5, 25),
hyperparameters={'C': FloatRange(0.5, 25, num=10),
'kernel': ['linear', 'rbf']})

my_pipe += estimator_selection

X, y = load_breast_cancer(return_X_y=True)
my_pipe.fit(X, y)

my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator()
print(my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator())
43 changes: 28 additions & 15 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,9 @@ def __init__(self, name: Optional[str],
cache_folder: str = None,
nr_of_processes: int = 1,
multi_threading: bool = True,
allow_multidim_targets: bool = False):
allow_multidim_targets: bool = False,
raise_error: bool = False,
score_train: bool = True):
"""
Initialize the object.
Expand Down Expand Up @@ -420,6 +422,12 @@ def __init__(self, name: Optional[str],
allow_multidim_targets:
Allows multidimensional targets.
score_train:
metrics for the train-set are only calculated if score_train is true.
raise_error:
if true, errors in the inner fold are raised instead of suppressed as warnings.
"""

self.name = re.sub(r'\W+', '', name)
Expand Down Expand Up @@ -514,6 +522,8 @@ def __init__(self, name: Optional[str],
self.permutation_id = permutation_id
self.allow_multidim_targets = allow_multidim_targets
self.is_final_fit = False
self.score_train = score_train
self.raise_error = raise_error

# ====================== Random Seed ===========================
self.random_state = random_seed
Expand Down Expand Up @@ -933,7 +943,7 @@ def _finalize_optimization(self):
logger.error(str(e))

# get feature importances of optimum pipe
logger.info("Mapping back feature importances...")
# logger.info("Mapping back feature importances...")
feature_importances = self.optimum_pipe.feature_importances_

if not feature_importances:
Expand All @@ -943,18 +953,18 @@ def _finalize_optimization(self):

# write backmapping file only if optimum_pipes inverse_transform works completely.
# restriction: only a faulty inverse_transform is considered, missing ones are further ignored.
with warnings.catch_warnings(record=True) as w:
# get backmapping
backmapping, _, _ = self.optimum_pipe.\
inverse_transform(np.array(feature_importances).reshape(1, -1), None)

if not any("The inverse transformation is not possible for" in s
for s in [e.message.args[0] for e in w]):
# save backmapping
self.results_handler.save_backmapping(
filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
else:
logger.info('Could not save feature importance: backmapping NOT successful.')
# with warnings.catch_warnings(record=True) as w:
# # get backmapping
# backmapping, _, _ = self.optimum_pipe.\
# inverse_transform(np.array(feature_importances).reshape(1, -1), None)
#
# if not any("The inverse transformation is not possible for" in s
# for s in [e.message.args[0] for e in w]):
# # save backmapping
# self.results_handler.save_backmapping(
# filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
# else:
# logger.info('Could not save feature importance: backmapping NOT successful.')

# save learning curves
if self.cross_validation.learning_curves:
Expand Down Expand Up @@ -1085,7 +1095,9 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
cache_folder=self.cache_folder,
cache_updater=self.recursive_cache_folder_propagation,
dummy_estimator=dummy_estimator,
result_obj=outer_fold)
result_obj=outer_fold,
score_train=self.score_train,
raise_error=self.raise_error)
# 2. monitor outputs
self.results.outer_folds.append(outer_fold)

Expand Down Expand Up @@ -1243,6 +1255,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar

# get feature importances
logger.photon_system_log("Permutation Importances: Calculating performances for " + fold_str)

perm_imps = permutation_importance(pipeline, test_X, test_y, **kwargs)

# store into list
Expand Down
4 changes: 2 additions & 2 deletions photonai/base/model_zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def set_default_pipeline(self, scaling, imputation, imputation_nan_value, featur
logger.photon_system_log("---")
logger.stars()

def fit(self, X=None, y=None):
def fit(self, X=None, y=None, **kwargs):
if (X is not None and self.X_csv_path is not None) or (y is not None and self.y_csv_path is not None):
raise ValueError("You can either give the fit function data or the pipe definition paths "
"to csv files to load data from. Not both.")
Expand All @@ -228,7 +228,7 @@ def fit(self, X=None, y=None):

X = X if X is not None else pd.read_csv(self.X_csv_path, delimiter=self.delimiter)
y = y if y is not None else pd.read_csv(self.y_csv_path, delimiter=self.delimiter)
super().fit(X, y)
super().fit(X, y, **kwargs)


class ClassificationPipe(DefaultPipeline):
Expand Down
8 changes: 8 additions & 0 deletions photonai/base/registry/PhotonCore.json
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@
"sklearn.linear_model.LogisticRegression",
"Estimator"
],
"LinearDiscriminantAnalysis": [
"sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
"Transformer"
],
"PassiveAggressiveClassifier":[
"sklearn.linear_model.PassiveAggressiveClassifier",
"Estimator"
Expand Down Expand Up @@ -486,5 +490,9 @@
"LocallyLinearEmbedding":[
"sklearn.manifold.LocallyLinearEmbedding",
"Transformer"
],
"CPMFeatureSelection":[
"photonai.modelwrapper.cpm_feature_selection.CPMFeatureSelection",
"Estimator"
]
}
Loading

0 comments on commit ea31ea7

Please sign in to comment.