Skip to content

Commit

Permalink
Added score_train parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
jernsting committed Sep 25, 2023
1 parent ef5fbb6 commit 94d1dd3
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 12 deletions.
10 changes: 8 additions & 2 deletions photonai/base/hyperpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ def __init__(self, name: Optional[str],
cache_folder: str = None,
nr_of_processes: int = 1,
multi_threading: bool = True,
allow_multidim_targets: bool = False):
allow_multidim_targets: bool = False,
score_train: bool = True):
"""
Initialize the object.
Expand Down Expand Up @@ -420,6 +421,9 @@ def __init__(self, name: Optional[str],
allow_multidim_targets:
Allows multidimensional targets.
score_train:
metrics for the train-set are only calculated if score_train is true.
"""

self.name = re.sub(r'\W+', '', name)
Expand Down Expand Up @@ -514,6 +518,7 @@ def __init__(self, name: Optional[str],
self.permutation_id = permutation_id
self.allow_multidim_targets = allow_multidim_targets
self.is_final_fit = False
self.score_train = score_train

# ====================== Random Seed ===========================
self.random_state = random_seed
Expand Down Expand Up @@ -1085,7 +1090,8 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
cache_folder=self.cache_folder,
cache_updater=self.recursive_cache_folder_propagation,
dummy_estimator=dummy_estimator,
result_obj=outer_fold)
result_obj=outer_fold,
score_train=self.score_train)
# 2. monitor outputs
self.results.outer_folds.append(outer_fold)

Expand Down
25 changes: 18 additions & 7 deletions photonai/processing/inner_folds.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,
training: bool = False,
cache_folder=None,
cache_updater=None,
scorer: Scorer = None):
scorer: Scorer = None,
score_train: bool = True):

self.params = specific_config
self.pipe = pipe_ctor
Expand All @@ -81,6 +82,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,

self.raise_error = raise_error
self.training = training
self.score_train = score_train

def fit(self, X, y, **kwargs):
"""Iterates over cross-validation folds and trains the pipeline,
Expand Down Expand Up @@ -136,7 +138,8 @@ def fit(self, X, y, **kwargs):
kwargs_cv_train),
test_data=InnerFoldManager.JobData(test_X, test_y, test,
kwargs_cv_test),
scorer=self.scorer)
scorer=self.scorer,
score_train=self.score_train)

# only for unparallel processing
# inform children in which inner fold we are
Expand Down Expand Up @@ -224,7 +227,8 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t
callbacks=self.optimization_constraints,
train_data=self.JobData(train_cut_X, train_cut_y, train_cut, train_cut_kwargs),
test_data=self.JobData(test_X, test_y, test, kwargs_cv_test),
scorer=self.scorer)
scorer=self.scorer,
score_train=self.score_train)
curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(job_data)
learning_curves.append([self.cross_validation_infos.learning_curves_cut.values[i], curr_test_cut.metrics,
curr_train_cut.metrics])
Expand All @@ -239,14 +243,15 @@ def __init__(self, X, y, indices, cv_kwargs):

class InnerCVJob:

def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer):
def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer, score_train):
self.pipe = pipe
self.config = config
self.metrics = metrics
self.callbacks = callbacks
self.train_data = train_data
self.test_data = test_data
self.scorer = scorer
self.score_train = score_train

@staticmethod
def update_config_item_with_inner_fold(config_item, fold_cnt, curr_train_fold, curr_test_fold, time_monitor,
Expand Down Expand Up @@ -344,17 +349,23 @@ def fit_and_score(job: InnerCVJob):
# start fitting
pipe.fit(job.train_data.X, job.train_data.y, **job.train_data.cv_kwargs)

logger.debug('Scoring Training Data')
logger.debug('Scoring Test Data')

# score test data
curr_test_fold = InnerFoldManager.score(pipe, job.test_data.X, job.test_data.y, job.metrics,
indices=job.test_data.indices,
scorer=job.scorer,
**job.test_data.cv_kwargs)

logger.debug('Scoring Test Data')
logger.debug('Scoring Training Data')
# score train data
curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics,
curr_train_fold = MDBScoreInformation(metrics={},
score_duration=0,
y_pred=np.zeros_like(job.train_data.y), y_true=job.train_data.y,
indices=np.asarray(job.train_data.indices).tolist(),
probabilities=None)
if job.score_train:
curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics,
indices=job.train_data.indices,
training=True,
scorer=job.scorer, **job.train_data.cv_kwargs)
Expand Down
10 changes: 7 additions & 3 deletions photonai/processing/outer_folds.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,16 @@ def __init__(self, pipe,
cache_folder=None,
cache_updater=None,
dummy_estimator=None,
result_obj=None):
result_obj=None,
score_train: bool = True):
self.outer_fold_id = outer_fold_id
self.cross_validation_info = cross_validation_info
self.scorer = Scorer(optimization_info.metrics)
self.optimization_info = optimization_info
self._pipe = pipe
self.copy_pipe_fnc = self._pipe.copy_me
self.dummy_estimator = dummy_estimator
self.score_train = score_train

self.cache_folder = cache_folder
self.cache_updater = cache_updater
Expand Down Expand Up @@ -246,6 +248,7 @@ def fit(self, X, y=None, **kwargs):
indices=self.cross_validation_info.outer_folds[self.outer_fold_id].test_indices,
metrics=self.optimization_info.metrics,
scorer=self.scorer,
score_train=self.score_train,
**self._test_kwargs)

logger.debug('... scoring training data')
Expand All @@ -255,6 +258,7 @@ def fit(self, X, y=None, **kwargs):
metrics=self.optimization_info.metrics,
training=True,
scorer=self.scorer,
score_train=self.score_train,
**self._validation_kwargs)

best_config_performance_mdb.training = train_score_mdb
Expand Down Expand Up @@ -386,7 +390,7 @@ def _fit_dummy(self):
self.dummy_estimator.fit(dummy_y, self._validation_y)
train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y,
metrics=self.optimization_info.metrics,
scorer=self.scorer)
scorer=self.scorer, score_train=self.score_train)

# fill result tree with fold information
inner_fold = MDBInnerFold()
Expand All @@ -396,7 +400,7 @@ def _fit_dummy(self):
test_scores = InnerFoldManager.score(self.dummy_estimator,
self._test_X, self._test_y,
metrics=self.optimization_info.metrics,
scorer=self.scorer)
scorer=self.scorer, score_train=self.score_train)
print_metrics("DUMMY", test_scores.metrics)
inner_fold.validation = test_scores

Expand Down

0 comments on commit 94d1dd3

Please sign in to comment.