Skip to content

Commit

Permalink
Merge branch 'develop' into feature/score_train
Browse files Browse the repository at this point in the history
  • Loading branch information
jernsting authored Oct 25, 2024
2 parents 9d10313 + f1e54cb commit a830f01
Show file tree
Hide file tree
Showing 11 changed files with 79 additions and 56 deletions.
2 changes: 1 addition & 1 deletion examples/advanced/gpboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def get_mock_data():
X, y, clst = get_mock_data()

# define project folder
project_folder = "/tmp/gpboost_debug"
project_folder = "./tmp/gpboost_debug"

my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random")
my_pipe.fit(X, y, clusters=clst)
27 changes: 13 additions & 14 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def _finalize_optimization(self):
logger.error(str(e))

# get feature importances of optimum pipe
logger.info("Mapping back feature importances...")
# logger.info("Mapping back feature importances...")
feature_importances = self.optimum_pipe.feature_importances_

if not feature_importances:
Expand All @@ -947,19 +947,18 @@ def _finalize_optimization(self):

# write backmapping file only if optimum_pipes inverse_transform works completely.
# restriction: only a faulty inverse_transform is considered, missing ones are further ignored.
with warnings.catch_warnings(record=True) as w:
# get backmapping
backmapping, _, _ = self.optimum_pipe.\
inverse_transform(np.array(feature_importances).reshape(1, -1), None)

if not any("The inverse transformation is not possible for" in s
for s in [e.message.args[0] for e in w]):
# save backmapping
self.results_handler.save_backmapping(
filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
self.results.best_config_feature_importances = list(np.squeeze(backmapping))
else:
logger.info('Could not save feature importance: backmapping NOT successful.')
# with warnings.catch_warnings(record=True) as w:
# # get backmapping
# backmapping, _, _ = self.optimum_pipe.\
# inverse_transform(np.array(feature_importances).reshape(1, -1), None)
#
# if not any("The inverse transformation is not possible for" in s
# for s in [e.message.args[0] for e in w]):
# # save backmapping
# self.results_handler.save_backmapping(
# filename='optimum_pipe_feature_importances_backmapped', backmapping=backmapping)
# else:
# logger.info('Could not save feature importance: backmapping NOT successful.')

# save learning curves
if self.cross_validation.learning_curves:
Expand Down
4 changes: 4 additions & 0 deletions photonai/base/registry/PhotonCore.json
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@
"sklearn.linear_model.LogisticRegression",
"Estimator"
],
"LinearDiscriminantAnalysis": [
"sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
"Transformer"
],
"PassiveAggressiveClassifier":[
"sklearn.linear_model.PassiveAggressiveClassifier",
"Estimator"
Expand Down
29 changes: 19 additions & 10 deletions photonai/modelwrapper/keras_base_estimator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import warnings
import os
import tensorflow.keras as keras
from sklearn.base import BaseEstimator

Expand Down Expand Up @@ -72,20 +73,28 @@ def encode_targets(self, y):

def save(self, filename):
# serialize model to JSON
warnings.warn("Using json export for compatibility, will be deprecated in future.")
model_json = self.model.to_json()
with open(filename + ".json", "w") as json_file:
json_file.write(model_json)
json_file.write(model_json)
# serialize weights to HDF5
self.model.save_weights(filename + ".h5")
self.model.save_weights(filename + ".weights.h5")
self.model.save(filename + ".keras")

def load(self, filename):
# load json and create model
json_file = open(filename + '.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = keras.models.model_from_json(loaded_model_json)
if not os.path.exists(filename+'.keras'):
warnings.warn("Using json import for compatiblity, will be deprecated in future. "
"Please save your model to get a *.keras file")
json_file = open(filename + '.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = keras.models.model_from_json(loaded_model_json)

loaded_model.load_weights(filename + ".weights.h5")
self.model = loaded_model
self.init_weights = self.model.get_weights()
else:
# load weights into new model
self.model = keras.models.load_model(filename + '.keras')

# load weights into new model
loaded_model.load_weights(filename + ".h5")
self.model = loaded_model
self.init_weights = self.model.get_weights()
2 changes: 1 addition & 1 deletion photonai/modelwrapper/keras_base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def optimizer(self, value):
if value.lower() not in __supported_optimizers__.keys():
raise ValueError("Optimizer is not supported by keras. Please use one of: "+str(__supported_optimizers__))
else:
self._optimizer = __supported_optimizers__[value.lower()](lr=self.learning_rate)
self._optimizer = __supported_optimizers__[value.lower()](learning_rate=self.learning_rate)

@property
def target_activation(self):
Expand Down
4 changes: 2 additions & 2 deletions photonai/processing/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ def register_custom_metric(cls, metric: Union[Metric_Type, Tuple[str, Metric_Typ
metric_obj = metric

def metric_func(y_true, y_pred):
metric_obj.reset_states()
metric_obj.reset_state()
metric_obj.update_state(y_true=y_true, y_pred=y_pred)
return float(cls.dynamic_keras_import.backend.eval(metric_obj.result()))
return float(metric_obj.result().numpy())

Scorer.CUSTOM_ELEMENT_DICTIONARY[metric_name] = metric_func
elif callable(metric):
Expand Down
2 changes: 1 addition & 1 deletion photonai/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
###### Requirements with temporary Version Specifiers ######
numpy
matplotlib
scikit-learn==1.1.3
scikit-learn==1.5.2
pandas
plotly
imbalanced-learn
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
numpy
matplotlib
scikit-learn==1.3.0
scikit-learn==1.5.2
pandas
plotly
imbalanced-learn==0.11.0
imbalanced-learn==0.12.4
pymodm
scipy
statsmodels
Expand All @@ -14,4 +14,4 @@ dask>=2021.10.0
distributed
scikit-optimize
xlrd
pbr
pbr
14 changes: 7 additions & 7 deletions test/base_tests/test_hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,14 +700,14 @@ def test_finalize_optimization(self):
# save optimum model
self.assert_best_model()

# backmapping
# backmapping - removed in 339d7d0
# because the pca is test disabled, we expect the number of features
self.assertEqual(len(self.hyperpipe.results.best_config_feature_importances[0]), self.__X.shape[1])
backmapped_feature_importances = os.path.join(self.hyperpipe.output_settings.results_folder,
'optimum_pipe_feature_importances_backmapped.csv')
self.assertTrue(os.path.isfile(backmapped_feature_importances))
loaded_array = np.loadtxt(open(backmapped_feature_importances, 'rb'), delimiter=",")
self.assertEqual(loaded_array.shape[0], self.__X.shape[1])
#self.assertEqual(len(self.hyperpipe.results.best_config_feature_importances[0]), self.__X.shape[1])
#backmapped_feature_importances = os.path.join(self.hyperpipe.output_settings.results_folder,
# 'optimum_pipe_feature_importances_backmapped.csv')
#self.assertTrue(os.path.isfile(backmapped_feature_importances))
#loaded_array = np.loadtxt(open(backmapped_feature_importances, 'rb'), delimiter=",")
#self.assertEqual(loaded_array.shape[0], self.__X.shape[1])

def assert_best_model(self):
self.assertTrue(os.path.isfile(os.path.join(self.hyperpipe.output_settings.results_folder,
Expand Down
23 changes: 16 additions & 7 deletions test/modelwrapper_tests/test_keras_basic.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sklearn.datasets import load_breast_cancer, load_diabetes
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Dense, Dropout, Input, Activation
import numpy as np
import warnings
import os
Expand All @@ -16,16 +16,17 @@ def setUp(self):
self.X, self.y = load_breast_cancer(return_X_y=True)

self.model = Sequential()
self.model.add(Dense(3, input_dim=self.X.shape[1], activation='relu'))
self.model.add(Input(shape=[self.X.shape[1]]))
self.model.add(Dense(3, activation="relu"))
self.model.add(Dropout(0.1))
self.model.add(Dense(2, activation='softmax'))
self.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

self.estimator_type = KerasBaseClassifier

inputs = tf.keras.Input(shape=(self.X.shape[1],))
x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
outputs = tf.keras.layers.Dense(2, activation=tf.nn.softmax)(x)
x = tf.keras.layers.Dense(4, activation=tf.keras.activations.relu)(inputs)
outputs = tf.keras.layers.Dense(2, activation=tf.keras.activations.softmax)(x)
self.tf_model = tf.keras.Model(inputs=inputs, outputs=outputs)
self.tf_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Expand Down Expand Up @@ -57,10 +58,18 @@ def test_tf_model(self):

estimator.save("keras_example_saved_model")

reload_estinator = self.estimator_type()
reload_estinator.load("keras_example_saved_model")
reload_estimator = self.estimator_type()
reload_estimator.load("keras_example_saved_model")

np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estimator.predict(self.X), decimal=3)

# remove novel keras file and test legacy import
os.remove("keras_example_saved_model.keras")

reload_estimator_legacy = self.estimator_type()
reload_estimator_legacy.load("keras_example_saved_model")

np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estinator.predict(self.X), decimal=3)
np.testing.assert_array_almost_equal(estimator.predict(self.X), reload_estimator.predict(self.X), decimal=3)

# remove saved keras files
for fname in os.listdir("."):
Expand Down
22 changes: 12 additions & 10 deletions test/processing_tests/test_results_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,24 +100,26 @@ def test_save_backmapping_weird_format(self):
def test_save_backmapping_csv(self):
"""
Check dimension of feature backmapping equals input dimensions for less than 1000 features.
removed in 339d7d0
"""
backmapping = np.loadtxt(os.path.join(self.hyperpipe.output_settings.results_folder,
'optimum_pipe_feature_importances_backmapped.csv'), delimiter=',')
self.assertEqual(np.shape(self.__X)[1], backmapping.size)
#backmapping = np.loadtxt(os.path.join(self.hyperpipe.output_settings.results_folder,
# 'optimum_pipe_feature_importances_backmapped.csv'), delimiter=',')
#self.assertEqual(np.shape(self.__X)[1], backmapping.size)

def test_save_backmapping_npz(self):
"""
Check dimension of feature backmapping equals input dimensions for more than 1000 features.
removed in 339d7d0
"""
# run another hyperpipe with more than 1000 features
# use np.tile to copy features until at least 1000 features are reached
X = np.tile(self.__X, (1, 35))
self.hyperpipe.fit(X, self.__y)
npzfile = np.load(os.path.join(self.hyperpipe.output_settings.results_folder,
'optimum_pipe_feature_importances_backmapped.npz'))
self.assertEqual(len(npzfile.files), 1)
backmapping = npzfile[npzfile.files[0]]
self.assertEqual(np.shape(X)[1], backmapping.size)
#X = np.tile(self.__X, (1, 35))
#self.hyperpipe.fit(X, self.__y)
#npzfile = np.load(os.path.join(self.hyperpipe.output_settings.results_folder,
# 'optimum_pipe_feature_importances_backmapped.npz'))
#self.assertEqual(len(npzfile.files), 1)
#backmapping = npzfile[npzfile.files[0]]
#self.assertEqual(np.shape(X)[1], backmapping.size)

def test_save_backmapping_stack(self):
# build hyperpipe with stack first
Expand Down

0 comments on commit a830f01

Please sign in to comment.