From 652a2770b2b67c8a5d6980f22e59a326153d18db Mon Sep 17 00:00:00 2001 From: Haibin Date: Thu, 28 Nov 2024 01:42:55 +1100 Subject: [PATCH 1/3] refactor: replace classificatin special & common function output with enum --- geochemistrypi/data_mining/model/_base.py | 14 ++--- .../data_mining/model/classification.py | 62 +++++++++++++------ .../model/func/algo_classification/_enum.py | 10 +++ 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/geochemistrypi/data_mining/model/_base.py b/geochemistrypi/data_mining/model/_base.py index b4c2791..6c00374 100644 --- a/geochemistrypi/data_mining/model/_base.py +++ b/geochemistrypi/data_mining/model/_base.py @@ -319,21 +319,21 @@ class TreeWorkflowMixin: """Mixin class for tree models.""" @staticmethod - def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None: """Draw the feature importance bar diagram.""" - print("-----* Feature Importance Diagram *-----") + print(f"-----* {func_name} *-----") columns_name = X_train.columns feature_importances = trained_model.feature_importances_ data = plot_feature_importance(columns_name, feature_importances, image_config) - save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path) - save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path, True) + save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path, True) @staticmethod - def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None: """Drawing decision tree diagrams.""" - print("-----* Single Tree Diagram *-----") + print(f"-----* {func_name} *-----") plot_decision_tree(trained_model, image_config) - save_fig(f"Tree Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path) class LinearWorkflowMixin: diff --git a/geochemistrypi/data_mining/model/classification.py b/geochemistrypi/data_mining/model/classification.py index bbb7e33..28cf90d 100644 --- a/geochemistrypi/data_mining/model/classification.py +++ b/geochemistrypi/data_mining/model/classification.py @@ -21,7 +21,7 @@ from ..constants import CUSTOMIZE_LABEL_STRATEGY, MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, OPTION, RAY_FLAML, SAMPLE_BALANCE_STRATEGY, SECTION from ..data.data_readiness import limit_num_input, num2option, num_input from ..plot.statistic_plot import basic_statistic -from ..utils.base import clear_output, save_data, save_data_without_data_identifier, save_fig, save_text +from ..utils.base import clear_output, save_data, save_fig, save_text from ._base import LinearWorkflowMixin, TreeWorkflowMixin, WorkflowBase from .func.algo_classification._common import ( cross_validation, @@ -35,7 +35,7 @@ score, ) from .func.algo_classification._decision_tree import decision_tree_manual_hyper_parameters -from .func.algo_classification._enum import ClassificationCommonFunction +from .func.algo_classification._enum import ClassificationCommonFunction, DecisionTreeSpecialFunction, MLPSpecialFunction from .func.algo_classification._extra_trees import extra_trees_manual_hyper_parameters from .func.algo_classification._gradient_boosting import gradient_boosting_manual_hyper_parameters from .func.algo_classification._knn import knn_manual_hyper_parameters @@ -124,24 +124,24 @@ def manual_hyper_parameters(cls) -> Dict: return dict() @staticmethod - def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> str: + def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, func_name: str) -> str: """Print the classification score report of the model.""" - print("-----* Model Score *-----") + print(f"-----* {func_name} *-----") average, scores = score(y_true, y_predict) scores_str = json.dumps(scores, indent=4) - save_text(scores_str, f"Model Score - {algorithm_name}", store_path) + save_text(scores_str, f"{func_name} - {algorithm_name}", store_path) mlflow.log_metrics(scores) return average @staticmethod - def _classification_report(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> None: + def _classification_report(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, func_name: str) -> None: """Print the classification report of the model.""" - print("-----* Classification Report *-----") + print(f"-----* {func_name} *-----") print(classification_report(y_true, y_predict)) scores = classification_report(y_true, y_predict, output_dict=True) scores_str = json.dumps(scores, indent=4) - save_text(scores_str, f"Classification Report - {algorithm_name}", store_path) - mlflow.log_artifact(os.path.join(store_path, f"Classification Report - {algorithm_name}.txt")) + save_text(scores_str, f"{func_name} - {algorithm_name}", store_path) + mlflow.log_artifact(os.path.join(store_path, f"{func_name} - {algorithm_name}.txt")) @staticmethod def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, graph_name: str, average: str, cv_num: int, algorithm_name: str, store_path: str) -> None: @@ -157,7 +157,7 @@ def _plot_confusion_matrix( y_test: pd.DataFrame, y_test_predict: pd.DataFrame, name_column: str, graph_name: str, trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str ) -> None: """Plot the confusion matrix of the model.""" - print("-----* {graph_name} *-----") + print(f"-----* {graph_name} *-----") data = plot_confusion_matrix(y_test, y_test_predict, trained_model) save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path) index = [f"true_{i}" for i in range(int(y_test.nunique().values))] @@ -275,12 +275,14 @@ def common_components(self) -> None: average = self._score( y_true=ClassificationWorkflowBase.y_test, y_predict=ClassificationWorkflowBase.y_test_predict, + func_name=ClassificationCommonFunction.MODEL_SCORE.value, algorithm_name=self.naming, store_path=GEOPI_OUTPUT_METRICS_PATH, ) self._classification_report( y_true=ClassificationWorkflowBase.y_test, y_predict=ClassificationWorkflowBase.y_test_predict, + func_name=ClassificationCommonFunction.CLASSIFICATION_REPORT.value, algorithm_name=self.naming, store_path=GEOPI_OUTPUT_METRICS_PATH, ) @@ -368,12 +370,14 @@ def common_components(self, is_automl: bool) -> None: y_true=ClassificationWorkflowBase.y_test, y_predict=ClassificationWorkflowBase.y_test_predict, algorithm_name=self.naming, + func_name=ClassificationCommonFunction.MODEL_SCORE.value, store_path=GEOPI_OUTPUT_METRICS_PATH, ) self._classification_report( y_true=ClassificationWorkflowBase.y_test, y_predict=ClassificationWorkflowBase.y_test_predict, algorithm_name=self.naming, + func_name=ClassificationCommonFunction.CLASSIFICATION_REPORT.value, store_path=GEOPI_OUTPUT_METRICS_PATH, ) self._cross_validation( @@ -936,6 +940,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -943,6 +948,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -957,6 +963,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -964,6 +971,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1255,6 +1263,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1262,6 +1271,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1276,6 +1286,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1283,6 +1294,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: trained_model=self.auto_model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1634,6 +1646,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1648,6 +1661,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1889,12 +1903,12 @@ def manual_hyper_parameters(cls) -> Dict: return hyper_parameters @staticmethod - def _plot_feature_importance(columns_name: np.ndarray, name_column: str, trained_model: any, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_feature_importance(columns_name: np.ndarray, name_column: str, trained_model: any, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None: """Print the feature coefficient value orderly.""" - print("-----* Feature Importance *-----") + print(f"-----* {func_name} *-----") data = plot_logistic_importance(columns_name, trained_model) - save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path) - save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path) @dispatch() def special_components(self, **kwargs) -> None: @@ -1916,6 +1930,7 @@ def special_components(self, **kwargs) -> None: name_column=LogisticRegressionClassification.name_all, trained_model=self.model, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1940,6 +1955,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: name_column=LogisticRegressionClassification.name_all, trained_model=self.auto_model, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2258,13 +2274,13 @@ def manual_hyper_parameters(cls) -> Dict: return hyper_parameters @staticmethod - def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_loss_curve(trained_model: object, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None: """Plot the learning curve of the trained model.""" - print("-----* Loss Curve Diagram *-----") + print(f"-----* {func_name} *-----") data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"]) data.plot(title="Loss") - save_fig(f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path) - save_data_without_data_identifier(data, f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path) @dispatch() def special_components(self, **kwargs) -> None: @@ -2274,6 +2290,7 @@ def special_components(self, **kwargs) -> None: self._plot_loss_curve( trained_model=self.model, algorithm_name=self.naming, + func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2286,6 +2303,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: self._plot_loss_curve( trained_model=self.auto_model, algorithm_name=self.naming, + func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2555,6 +2573,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2562,6 +2581,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2576,6 +2596,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2583,6 +2604,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2920,6 +2942,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2927,6 +2950,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model.estimators_[0][0], image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2941,6 +2965,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2948,6 +2973,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model.estimators_[0][0], image_config=self.image_config, algorithm_name=self.naming, + func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) diff --git a/geochemistrypi/data_mining/model/func/algo_classification/_enum.py b/geochemistrypi/data_mining/model/func/algo_classification/_enum.py index 2552ea0..027a918 100644 --- a/geochemistrypi/data_mining/model/func/algo_classification/_enum.py +++ b/geochemistrypi/data_mining/model/func/algo_classification/_enum.py @@ -3,6 +3,7 @@ class ClassificationCommonFunction(Enum): MODEL_SCORE = "Model Score" + CLASSIFICATION_REPORT = "Classification Report" CONFUSION_MATRIX = "Confusion Matrix" CROSS_VALIDATION = "Cross Validation" MODEL_PREDICTION = "Model Prediction" @@ -12,3 +13,12 @@ class ClassificationCommonFunction(Enum): ROC_CURVE = "ROC Curve" TWO_DIMENSIONAL_DECISION_BOUNDARY_DIAGRAM = "Two-dimensional Decision Boundary Diagram" PERMUTATION_IMPORTANCE_DIAGRAM = "Permutation Importance Diagram" + + +class DecisionTreeSpecialFunction(Enum): + FEATURE_IMPORTANCE = "Feature Importance" + TREE_DIAGRAM = "Tree Diagram" + + +class MLPSpecialFunction(Enum): + LOSS_CURVE_DIAGRAM = "Loss Curve Diagram" From 1cc5075a58fe9c05f94a4a0d448e57870f537c8c Mon Sep 17 00:00:00 2001 From: Haibin Date: Thu, 28 Nov 2024 01:42:55 +1100 Subject: [PATCH 2/3] refactor: separate classification func name in enum --- .../data_mining/model/classification.py | 43 +++++++++++-------- .../model/func/algo_classification/_enum.py | 23 ++++++++++ 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/geochemistrypi/data_mining/model/classification.py b/geochemistrypi/data_mining/model/classification.py index 28cf90d..cb82c3b 100644 --- a/geochemistrypi/data_mining/model/classification.py +++ b/geochemistrypi/data_mining/model/classification.py @@ -35,7 +35,16 @@ score, ) from .func.algo_classification._decision_tree import decision_tree_manual_hyper_parameters -from .func.algo_classification._enum import ClassificationCommonFunction, DecisionTreeSpecialFunction, MLPSpecialFunction +from .func.algo_classification._enum import ( + ClassificationCommonFunction, + DecisionTreeSpecialFunction, + ExtraTreesSpecialFunction, + GradientBoostingSpecialFunction, + LogisticRegressionSpecialFunction, + MLPSpecialFunction, + RandomForestSpecialFunction, + XGBoostSpecialFunction, +) from .func.algo_classification._extra_trees import extra_trees_manual_hyper_parameters from .func.algo_classification._gradient_boosting import gradient_boosting_manual_hyper_parameters from .func.algo_classification._knn import knn_manual_hyper_parameters @@ -1263,7 +1272,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=RandomForestSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1271,7 +1280,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, + func_name=RandomForestSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1286,7 +1295,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=RandomForestSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1294,7 +1303,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: trained_model=self.auto_model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, + func_name=RandomForestSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1646,7 +1655,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=XGBoostSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1661,7 +1670,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=XGBoostSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1930,7 +1939,7 @@ def special_components(self, **kwargs) -> None: name_column=LogisticRegressionClassification.name_all, trained_model=self.model, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=LogisticRegressionSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -1955,7 +1964,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None: name_column=LogisticRegressionClassification.name_all, trained_model=self.auto_model, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=LogisticRegressionSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2573,7 +2582,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=ExtraTreesSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2581,7 +2590,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, + func_name=ExtraTreesSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2596,7 +2605,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=ExtraTreesSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2604,7 +2613,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model.estimators_[0], image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, + func_name=ExtraTreesSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2942,7 +2951,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=GradientBoostingSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2950,7 +2959,7 @@ def special_components(self, **kwargs) -> None: trained_model=self.model.estimators_[0][0], image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, + func_name=GradientBoostingSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2965,7 +2974,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model, image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.FEATURE_IMPORTANCE.value, + func_name=GradientBoostingSpecialFunction.FEATURE_IMPORTANCE.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) @@ -2973,7 +2982,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: trained_model=self.auto_model.estimators_[0][0], image_config=self.image_config, algorithm_name=self.naming, - func_name=DecisionTreeSpecialFunction.TREE_DIAGRAM.value, + func_name=GradientBoostingSpecialFunction.TREE_DIAGRAM.value, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, ) diff --git a/geochemistrypi/data_mining/model/func/algo_classification/_enum.py b/geochemistrypi/data_mining/model/func/algo_classification/_enum.py index 027a918..0d3d5e3 100644 --- a/geochemistrypi/data_mining/model/func/algo_classification/_enum.py +++ b/geochemistrypi/data_mining/model/func/algo_classification/_enum.py @@ -20,5 +20,28 @@ class DecisionTreeSpecialFunction(Enum): TREE_DIAGRAM = "Tree Diagram" +class RandomForestSpecialFunction(Enum): + FEATURE_IMPORTANCE = "Feature Importance" + TREE_DIAGRAM = "Tree Diagram" + + +class XGBoostSpecialFunction(Enum): + FEATURE_IMPORTANCE = "Feature Importance" + + +class LogisticRegressionSpecialFunction(Enum): + FEATURE_IMPORTANCE = "Feature Importance" + + class MLPSpecialFunction(Enum): LOSS_CURVE_DIAGRAM = "Loss Curve Diagram" + + +class ExtraTreesSpecialFunction(Enum): + FEATURE_IMPORTANCE = "Feature Importance" + TREE_DIAGRAM = "Tree Diagram" + + +class GradientBoostingSpecialFunction(Enum): + FEATURE_IMPORTANCE = "Feature Importance" + TREE_DIAGRAM = "Tree Diagram" From fd21c017bf7be2b4c5bb37725a6dd534fa0b52ce Mon Sep 17 00:00:00 2001 From: Haibin Date: Fri, 17 Jan 2025 21:03:03 +0800 Subject: [PATCH 3/3] refactor: use save_without_id in MLP loss_curve func --- geochemistrypi/data_mining/model/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geochemistrypi/data_mining/model/classification.py b/geochemistrypi/data_mining/model/classification.py index cb82c3b..3a97f27 100644 --- a/geochemistrypi/data_mining/model/classification.py +++ b/geochemistrypi/data_mining/model/classification.py @@ -21,7 +21,7 @@ from ..constants import CUSTOMIZE_LABEL_STRATEGY, MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, OPTION, RAY_FLAML, SAMPLE_BALANCE_STRATEGY, SECTION from ..data.data_readiness import limit_num_input, num2option, num_input from ..plot.statistic_plot import basic_statistic -from ..utils.base import clear_output, save_data, save_fig, save_text +from ..utils.base import clear_output, save_data, save_data_without_data_identifier, save_fig, save_text from ._base import LinearWorkflowMixin, TreeWorkflowMixin, WorkflowBase from .func.algo_classification._common import ( cross_validation, @@ -2289,7 +2289,7 @@ def _plot_loss_curve(trained_model: object, algorithm_name: str, func_name: str, data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"]) data.plot(title="Loss") save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path) - save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path) + save_data_without_data_identifier(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path) @dispatch() def special_components(self, **kwargs) -> None: