refactor: relplace code with enum class in regression

ZJUEarthData · Oct 21, 2024 · 78a8b98 · 78a8b98
1 parent b07c6e9
commit 78a8b98
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 19 deletions.
diff --git a/geochemistrypi/data_mining/model/func/algo_regression/_enum.py b/geochemistrypi/data_mining/model/func/algo_regression/_enum.py
@@ -0,0 +1,12 @@
+from enum import Enum
+
+
+class RegressionCommonFunction(Enum):
+    PREDICTED_VS_ACTUAL_DIAGRAM = "Predicted vs. Actual Diagram"
+    RESIDUALS_DIAGRAM = "Residuals Diagram"
+    MODEL_SCORE = "Model Score"
+    CROSS_VALIDATION = "Cross Validation"
+
+
+class MLPSpecialFunction(Enum):
+    LOSS_CURVE_DIAGRAM = "Loss Curve Diagram"
diff --git a/geochemistrypi/data_mining/model/regression.py b/geochemistrypi/data_mining/model/regression.py
@@ -25,6 +25,7 @@
 from .func.algo_regression._common import cross_validation, plot_predicted_vs_actual, plot_residuals, score
 from .func.algo_regression._decision_tree import decision_tree_manual_hyper_parameters
 from .func.algo_regression._elastic_net import elastic_net_manual_hyper_parameters
+from .func.algo_regression._enum import MLPSpecialFunction, RegressionCommonFunction
 from .func.algo_regression._extra_tree import extra_trees_manual_hyper_parameters
 from .func.algo_regression._gradient_boosting import gradient_boosting_manual_hyper_parameters
 from .func.algo_regression._knn import knn_manual_hyper_parameters
@@ -121,40 +122,40 @@ def manual_hyper_parameters(cls) -> Dict:
         return dict()
 
     @staticmethod
-    def _plot_predicted_vs_actual(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+    def _plot_predicted_vs_actual(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
         """Plot the predicted vs. actual diagram."""
-        print("-----* Predicted vs. Actual Diagram *-----")
+        print(f"-----* {grah_name} *-----")
         plot_predicted_vs_actual(y_test_predict, y_test, algorithm_name)
-        save_fig(f"Predicted vs. Actual Diagram - {algorithm_name}", local_path, mlflow_path)
+        save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
         data = pd.concat([y_test, y_test_predict], axis=1)
-        save_data(data, name_column, f"Predicted vs. Actual Diagram - {algorithm_name}", local_path, mlflow_path)
+        save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
 
     @staticmethod
-    def _plot_residuals(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+    def _plot_residuals(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
         """Plot the residuals diagram."""
-        print("-----* Residuals Diagram *-----")
+        print(f"-----* {grah_name} *-----")
         residuals = plot_residuals(y_test_predict, y_test, algorithm_name)
-        save_fig(f"Residuals Diagram - {algorithm_name}", local_path, mlflow_path)
+        save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
         data = pd.concat([y_test, residuals], axis=1)
-        save_data(data, name_column, f"Residuals Diagram - {algorithm_name}", local_path, mlflow_path)
+        save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
 
     @staticmethod
-    def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> None:
+    def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, grah_name: str) -> None:
         """Calculate the score of the model."""
-        print("-----* Model Score *-----")
+        print(f"-----* {grah_name} *-----")
         scores = score(y_true, y_predict)
         scores_str = json.dumps(scores, indent=4)
-        save_text(scores_str, f"Model Score - {algorithm_name}", store_path)
+        save_text(scores_str, f"{grah_name} - {algorithm_name}", store_path)
         mlflow.log_metrics(scores)
 
     @staticmethod
-    def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, cv_num: int, algorithm_name: str, store_path: str) -> None:
+    def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, cv_num: int, algorithm_name: str, store_path: str, grah_name: str) -> None:
         """Cross validation."""
-        print("-----* Cross Validation *-----")
+        print(f"-----* {grah_name} *-----")
         print(f"K-Folds: {cv_num}")
         scores = cross_validation(trained_model, X_train, y_train, cv_num=cv_num)
         scores_str = json.dumps(scores, indent=4)
-        save_text(scores_str, f"Cross Validation - {algorithm_name}", store_path)
+        save_text(scores_str, f"{grah_name} - {algorithm_name}", store_path)
 
     @dispatch()
     def common_components(self) -> None:
@@ -166,6 +167,7 @@ def common_components(self) -> None:
             y_predict=RegressionWorkflowBase.y_test_predict,
             algorithm_name=self.naming,
             store_path=GEOPI_OUTPUT_METRICS_PATH,
+            grah_name=RegressionCommonFunction.MODEL_SCORE.value,
         )
         self._cross_validation(
             trained_model=self.model,
@@ -174,6 +176,7 @@ def common_components(self) -> None:
             cv_num=10,
             algorithm_name=self.naming,
             store_path=GEOPI_OUTPUT_METRICS_PATH,
+            grah_name=RegressionCommonFunction.CROSS_VALIDATION.value,
         )
         self._plot_predicted_vs_actual(
             y_test_predict=RegressionWorkflowBase.y_test_predict,
@@ -182,6 +185,7 @@ def common_components(self) -> None:
             algorithm_name=self.naming,
             local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
             mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            grah_name=RegressionCommonFunction.PREDICTED_VS_ACTUAL_DIAGRAM.value,
         )
         self._plot_residuals(
             y_test_predict=RegressionWorkflowBase.y_test_predict,
@@ -190,6 +194,7 @@ def common_components(self) -> None:
             algorithm_name=self.naming,
             local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
             mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            grah_name=RegressionCommonFunction.RESIDUALS_DIAGRAM.value,
         )
         self._plot_permutation_importance(
             X_test=RegressionWorkflowBase.X_test,
@@ -212,6 +217,7 @@ def common_components(self, is_automl: bool = False) -> None:
             y_predict=RegressionWorkflowBase.y_test_predict,
             algorithm_name=self.naming,
             store_path=GEOPI_OUTPUT_METRICS_PATH,
+            grah_name=RegressionCommonFunction.MODEL_SCORE.value,
         )
         self._cross_validation(
             trained_model=self.auto_model,
@@ -220,6 +226,7 @@ def common_components(self, is_automl: bool = False) -> None:
             cv_num=10,
             algorithm_name=self.naming,
             store_path=GEOPI_OUTPUT_METRICS_PATH,
+            grah_name=RegressionCommonFunction.CROSS_VALIDATION.value,
         )
         self._plot_predicted_vs_actual(
             y_test_predict=RegressionWorkflowBase.y_test_predict,
@@ -228,6 +235,7 @@ def common_components(self, is_automl: bool = False) -> None:
             algorithm_name=self.naming,
             local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
             mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            grah_name=RegressionCommonFunction.PREDICTED_VS_ACTUAL_DIAGRAM.value,
         )
         self._plot_residuals(
             y_test_predict=RegressionWorkflowBase.y_test_predict,
@@ -236,6 +244,7 @@ def common_components(self, is_automl: bool = False) -> None:
             algorithm_name=self.naming,
             local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
             mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+            grah_name=RegressionCommonFunction.RESIDUALS_DIAGRAM.value,
         )
         self._plot_permutation_importance(
             X_test=RegressionWorkflowBase.X_test,
@@ -1726,7 +1735,7 @@ class MLPRegression(RegressionWorkflowBase):
     """The automation workflow of using Multi-layer Perceptron algorithm to make insightful products."""
 
     name = "Multi-layer Perceptron"
-    special_function = ["Loss Curve Diagram"]
+    special_function = [func.value for func in MLPSpecialFunction]  # Loss Curve Diagram
 
     def __init__(
         self,
@@ -2018,13 +2027,13 @@ def manual_hyper_parameters(cls) -> Dict:
         return hyper_parameters
 
     @staticmethod
-    def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
+    def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
         """Plot the learning curve of the trained model."""
-        print("-----* Loss Curve Diagram *-----")
+        print(f"-----* {func_name} *-----")
         data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"])
         data.plot(title="Loss")
-        save_fig(f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
-        save_data(data, f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
+        save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
+        save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path)
 
     @dispatch()
     def special_components(self, **kwargs) -> None:
@@ -2036,6 +2045,7 @@ def special_components(self, **kwargs) -> None:
                 algorithm_name=self.naming,
                 local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
                 mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+                func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value,
             )
 
     @dispatch(bool)
@@ -2048,6 +2058,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
                 algorithm_name=self.naming,
                 local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
                 mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
+                func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value,
             )