Skip to content

Commit

Permalink
refactor: relplace code with enum class in regression
Browse files Browse the repository at this point in the history
  • Loading branch information
clara-sq committed Oct 21, 2024
1 parent b07c6e9 commit 78a8b98
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 19 deletions.
12 changes: 12 additions & 0 deletions geochemistrypi/data_mining/model/func/algo_regression/_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from enum import Enum


class RegressionCommonFunction(Enum):
PREDICTED_VS_ACTUAL_DIAGRAM = "Predicted vs. Actual Diagram"
RESIDUALS_DIAGRAM = "Residuals Diagram"
MODEL_SCORE = "Model Score"
CROSS_VALIDATION = "Cross Validation"


class MLPSpecialFunction(Enum):
LOSS_CURVE_DIAGRAM = "Loss Curve Diagram"
49 changes: 30 additions & 19 deletions geochemistrypi/data_mining/model/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .func.algo_regression._common import cross_validation, plot_predicted_vs_actual, plot_residuals, score
from .func.algo_regression._decision_tree import decision_tree_manual_hyper_parameters
from .func.algo_regression._elastic_net import elastic_net_manual_hyper_parameters
from .func.algo_regression._enum import MLPSpecialFunction, RegressionCommonFunction
from .func.algo_regression._extra_tree import extra_trees_manual_hyper_parameters
from .func.algo_regression._gradient_boosting import gradient_boosting_manual_hyper_parameters
from .func.algo_regression._knn import knn_manual_hyper_parameters
Expand Down Expand Up @@ -121,40 +122,40 @@ def manual_hyper_parameters(cls) -> Dict:
return dict()

@staticmethod
def _plot_predicted_vs_actual(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_predicted_vs_actual(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot the predicted vs. actual diagram."""
print("-----* Predicted vs. Actual Diagram *-----")
print(f"-----* {grah_name} *-----")
plot_predicted_vs_actual(y_test_predict, y_test, algorithm_name)
save_fig(f"Predicted vs. Actual Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([y_test, y_test_predict], axis=1)
save_data(data, name_column, f"Predicted vs. Actual Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_residuals(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_residuals(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot the residuals diagram."""
print("-----* Residuals Diagram *-----")
print(f"-----* {grah_name} *-----")
residuals = plot_residuals(y_test_predict, y_test, algorithm_name)
save_fig(f"Residuals Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([y_test, residuals], axis=1)
save_data(data, name_column, f"Residuals Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> None:
def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, grah_name: str) -> None:
"""Calculate the score of the model."""
print("-----* Model Score *-----")
print(f"-----* {grah_name} *-----")
scores = score(y_true, y_predict)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Model Score - {algorithm_name}", store_path)
save_text(scores_str, f"{grah_name} - {algorithm_name}", store_path)
mlflow.log_metrics(scores)

@staticmethod
def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, cv_num: int, algorithm_name: str, store_path: str) -> None:
def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, cv_num: int, algorithm_name: str, store_path: str, grah_name: str) -> None:
"""Cross validation."""
print("-----* Cross Validation *-----")
print(f"-----* {grah_name} *-----")
print(f"K-Folds: {cv_num}")
scores = cross_validation(trained_model, X_train, y_train, cv_num=cv_num)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Cross Validation - {algorithm_name}", store_path)
save_text(scores_str, f"{grah_name} - {algorithm_name}", store_path)

@dispatch()
def common_components(self) -> None:
Expand All @@ -166,6 +167,7 @@ def common_components(self) -> None:
y_predict=RegressionWorkflowBase.y_test_predict,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
grah_name=RegressionCommonFunction.MODEL_SCORE.value,
)
self._cross_validation(
trained_model=self.model,
Expand All @@ -174,6 +176,7 @@ def common_components(self) -> None:
cv_num=10,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
grah_name=RegressionCommonFunction.CROSS_VALIDATION.value,
)
self._plot_predicted_vs_actual(
y_test_predict=RegressionWorkflowBase.y_test_predict,
Expand All @@ -182,6 +185,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=RegressionCommonFunction.PREDICTED_VS_ACTUAL_DIAGRAM.value,
)
self._plot_residuals(
y_test_predict=RegressionWorkflowBase.y_test_predict,
Expand All @@ -190,6 +194,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=RegressionCommonFunction.RESIDUALS_DIAGRAM.value,
)
self._plot_permutation_importance(
X_test=RegressionWorkflowBase.X_test,
Expand All @@ -212,6 +217,7 @@ def common_components(self, is_automl: bool = False) -> None:
y_predict=RegressionWorkflowBase.y_test_predict,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
grah_name=RegressionCommonFunction.MODEL_SCORE.value,
)
self._cross_validation(
trained_model=self.auto_model,
Expand All @@ -220,6 +226,7 @@ def common_components(self, is_automl: bool = False) -> None:
cv_num=10,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
grah_name=RegressionCommonFunction.CROSS_VALIDATION.value,
)
self._plot_predicted_vs_actual(
y_test_predict=RegressionWorkflowBase.y_test_predict,
Expand All @@ -228,6 +235,7 @@ def common_components(self, is_automl: bool = False) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=RegressionCommonFunction.PREDICTED_VS_ACTUAL_DIAGRAM.value,
)
self._plot_residuals(
y_test_predict=RegressionWorkflowBase.y_test_predict,
Expand All @@ -236,6 +244,7 @@ def common_components(self, is_automl: bool = False) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=RegressionCommonFunction.RESIDUALS_DIAGRAM.value,
)
self._plot_permutation_importance(
X_test=RegressionWorkflowBase.X_test,
Expand Down Expand Up @@ -1726,7 +1735,7 @@ class MLPRegression(RegressionWorkflowBase):
"""The automation workflow of using Multi-layer Perceptron algorithm to make insightful products."""

name = "Multi-layer Perceptron"
special_function = ["Loss Curve Diagram"]
special_function = [func.value for func in MLPSpecialFunction] # Loss Curve Diagram

def __init__(
self,
Expand Down Expand Up @@ -2018,13 +2027,13 @@ def manual_hyper_parameters(cls) -> Dict:
return hyper_parameters

@staticmethod
def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Plot the learning curve of the trained model."""
print("-----* Loss Curve Diagram *-----")
print(f"-----* {func_name} *-----")
data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"])
data.plot(title="Loss")
save_fig(f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@dispatch()
def special_components(self, **kwargs) -> None:
Expand All @@ -2036,6 +2045,7 @@ def special_components(self, **kwargs) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value,
)

@dispatch(bool)
Expand All @@ -2048,6 +2058,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value,
)


Expand Down

0 comments on commit 78a8b98

Please sign in to comment.