Skip to content

Commit

Permalink
Merge pull request #269 from ZJUEarthData/dev/YmY
Browse files Browse the repository at this point in the history
feat: add automatic parameter tuning for the SGD regression algorithm.
  • Loading branch information
SanyHe authored Oct 25, 2023
2 parents 22b5b93 + 337ef7a commit 81921a6
Showing 1 changed file with 128 additions and 0 deletions.
128 changes: 128 additions & 0 deletions geochemistrypi/data_mining/model/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3525,6 +3525,54 @@ def __init__(
)

self.naming = SGDRegression.name
self.customized = True
self.customized_name = "SGD Regression"

@property
def settings(self) -> Dict:
"""The configuration of SVR to implement AutoML by FLAML framework."""
configuration = {
"time_budget": 10, # total running time in seconds
"metric": "r2",
"estimator_list": [self.customized_name], # list of ML learners
"task": "regression", # task type
# "log_file_name": f'{self.naming} - automl.log', # flaml log file
# "log_training_metric": True, # whether to log training metric
}
return configuration

@property
def customization(self) -> object:
"""The customized SVR of FLAML framework."""
from flaml import tune
from flaml.data import REGRESSION
from flaml.model import SKLearnEstimator
from sklearn.linear_model import SGDRegressor

class MySGDRegression(SKLearnEstimator):
def __init__(self, task="regression", n_jobs=None, **config):
super().__init__(task, **config)
if task in REGRESSION:
self.estimator_class = SGDRegressor

@classmethod
def search_space(cls, data_size, task):
space = {
"loss": {"domain": tune.choice(["squared_error", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"]), "init_value": "squared_error"},
"penalty": {"domain": tune.choice(["l2", "l1", "elasticnet", None]), "init_value": "l2"},
"alpha": {"domain": tune.loguniform(lower=0.0001, upper=1), "init_value": 0.0001},
"fit_intercept": {"domain": tune.choice([True, False]), "init_value": True},
"max_iter": {"domain": tune.randint(lower=50, upper=1000), "init_value": 1000},
"tol": {"domain": tune.loguniform(lower=0.000001, upper=0.001), "init_value": 0.001},
"shuffle": {"domain": tune.choice([True, False]), "init_value": True},
"learning_rate": {"domain": tune.choice(["constant", "optimal", "invscaling", "adaptive"]), "init_value": "invscaling"},
"eta0": {"domain": tune.loguniform(lower=0.0001, upper=0.1), "init_value": 0.01},
"power_t": {"domain": tune.uniform(lower=0.1, upper=0.9), "init_value": 0.25},
"l1_ratio": {"domain": tune.uniform(lower=0, upper=1), "init_value": 0.15},
}
return space

return MySGDRegression

@classmethod
def manual_hyper_parameters(cls) -> Dict:
Expand All @@ -3534,6 +3582,7 @@ def manual_hyper_parameters(cls) -> Dict:
clear_output()
return hyper_parameters

@dispatch()
def special_components(self, **kwargs) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
Expand Down Expand Up @@ -3611,3 +3660,82 @@ def special_components(self, **kwargs) -> None:
)
else:
pass

@dispatch(bool)
def special_components(self, is_automl: bool, **kwargs) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
GEOPI_OUTPUT_ARTIFACTS_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_PATH")
self._show_formula(
coef=[self.auto_model.coef_],
intercept=self.auto_model.intercept_,
features_name=SGDRegression.X_train.columns,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_PATH,
mlflow_path="root",
)
columns_num = SGDRegression.X.shape[1]
if columns_num > 2:
# choose one of dimensions to draw
two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(SGDRegression.X_test, 1)
self._plot_2d_scatter_diagram(
feature_data=two_dimen_data,
target_data=SGDRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
# choose two of dimensions to draw
three_dimen_axis_index, three_dimen_data = self.choose_dimension_data(SGDRegression.X_test, 2)
self._plot_3d_scatter_diagram(
feature_data=three_dimen_data,
target_data=SGDRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
elif columns_num == 2:
# choose one of dimensions to draw
two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(SGDRegression.X_test, 1)
self._plot_2d_scatter_diagram(
feature_data=two_dimen_data,
target_data=SGDRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
# no need to choose
self._plot_3d_scatter_diagram(
feature_data=SGDRegression.X_test,
target_data=SGDRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_3d_surface_diagram(
feature_data=SGDRegression.X_test,
target_data=SGDRegression.y_test,
y_test_predict=SGDRegression.y_test_predict,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
elif columns_num == 1:
# no need to choose
self._plot_2d_scatter_diagram(
feature_data=SGDRegression.X_test,
target_data=SGDRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_2d_line_diagram(
feature_data=SGDRegression.X_test,
target_data=SGDRegression.y_test,
y_test_predict=SGDRegression.y_test_predict,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
else:
pass

0 comments on commit 81921a6

Please sign in to comment.