Skip to content

Commit

Permalink
feat:Added automatic parameter tuning function of the ElasticNetRegre…
Browse files Browse the repository at this point in the history
…ssion algorithm
  • Loading branch information
Kill-virus committed Oct 23, 2023
1 parent cccb68e commit 7d3ab2a
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 1 deletion.
2 changes: 1 addition & 1 deletion geochemistrypi/data_mining/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
DECOMPOSITION_MODELS = ["PCA", "T-SNE", "MDS"]

# Special AutoML models
NON_AUTOML_MODELS = ["Linear Regression", "Polynomial Regression", "Lasso Regression", "Elastic Net"]
NON_AUTOML_MODELS = ["Linear Regression", "Polynomial Regression", "Lasso Regression"]
RAY_FLAML = ["Multi-layer Perceptron"]

IMPUTING_STRATEGY = ["Mean Value", "Median Value", "Most Frequent Value", "Constant(Specified Value)"]
Expand Down
123 changes: 123 additions & 0 deletions geochemistrypi/data_mining/model/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2981,6 +2981,49 @@ def __init__(
)

self.naming = ElasticNetRegression.name
self.customized = True
self.customized_name = "Elastic Net"

@property
def settings(self) -> Dict:
"""The configuration of Elastic Net to implement AutoML by FLAML framework."""
configuration = {
"time_budget": 10, # total running time in seconds
"metric": "r2",
"estimator_list": [self.customized_name], # list of ML learners
"task": "regression", # task type
# "log_file_name": f'{self.naming} - automl.log', # flaml log file
# "log_training_metric": True, # whether to log training metric
}
return configuration

@property
def customization(self) -> object:
"""The customized Elastic Net of FLAML framework."""
from flaml import tune
from flaml.data import REGRESSION
from flaml.model import SKLearnEstimator
from sklearn.linear_model import ElasticNet

class MyElasticNetRegression(SKLearnEstimator):
def __init__(self, task="regression", n_jobs=None, **config):
super().__init__(task, **config)
if task in REGRESSION:
self.estimator_class = ElasticNet

@classmethod
def search_space(cls, data_size, task):
space = {
"alpha": {"domain": tune.uniform(lower=0.001, upper=10), "init_value": 1},
"l1_ratio": {"domain": tune.uniform(lower=0.001, upper=1), "init_value": 0.5},
"fit_intercept": {"domain": tune.choice([True, False])},
"max_iter": {"domain": tune.randint(lower=500, upper=2000), "init_value": 1000},
"tol": {"domain": tune.uniform(lower=1e-5, upper=1e-3), "init_value": 1e-4},
"selection": {"domain": tune.choice(["cyclic", "random"])},
}
return space

return MyElasticNetRegression

@classmethod
def manual_hyper_parameters(cls) -> Dict:
Expand All @@ -2990,6 +3033,7 @@ def manual_hyper_parameters(cls) -> Dict:
clear_output()
return hyper_parameters

@dispatch()
def special_components(self, **kwargs) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
Expand Down Expand Up @@ -3068,6 +3112,85 @@ def special_components(self, **kwargs) -> None:
else:
pass

@dispatch(bool)
def special_components(self, is_automl: bool, **kwargs) -> None:
"""Invoke all special application functions for this algorithms by FLAML framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
GEOPI_OUTPUT_ARTIFACTS_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_PATH")
self._show_formula(
coef=[self.auto_model.coef_],
intercept=self.auto_model.intercept_,
features_name=ElasticNetRegression.X_train.columns,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_PATH,
mlflow_path="root",
)
columns_num = ElasticNetRegression.X.shape[1]
if columns_num > 2:
# choose one of dimensions to draw
two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(ElasticNetRegression.X_test, 1)
self._plot_2d_scatter_diagram(
feature_data=two_dimen_data,
target_data=ElasticNetRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
# choose two of dimensions to draw
three_dimen_axis_index, three_dimen_data = self.choose_dimension_data(ElasticNetRegression.X_test, 2)
self._plot_3d_scatter_diagram(
feature_data=three_dimen_data,
target_data=ElasticNetRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
elif columns_num == 2:
# choose one of dimensions to draw
two_dimen_axis_index, two_dimen_data = self.choose_dimension_data(ElasticNetRegression.X_test, 1)
self._plot_2d_scatter_diagram(
feature_data=two_dimen_data,
target_data=ElasticNetRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
# no need to choose
self._plot_3d_scatter_diagram(
feature_data=ElasticNetRegression.X_test,
target_data=ElasticNetRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_3d_surface_diagram(
feature_data=ElasticNetRegression.X_test,
target_data=ElasticNetRegression.y_test,
y_test_predict=ElasticNetRegression.y_test_predict,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
elif columns_num == 1:
# no need to choose
self._plot_2d_scatter_diagram(
feature_data=ElasticNetRegression.X_test,
target_data=ElasticNetRegression.y_test,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_2d_line_diagram(
feature_data=ElasticNetRegression.X_test,
target_data=ElasticNetRegression.y_test,
y_test_predict=ElasticNetRegression.y_test_predict,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
else:
pass


class SGDRegression(LinearWorkflowMixin, RegressionWorkflowBase):
"""The automation workflow of using Stochastic Gradient Descent - SGD algorithm to make insightful products."""
Expand Down

0 comments on commit 7d3ab2a

Please sign in to comment.