Skip to content

Commit

Permalink
refactor: replace classification special & common function output wit…
Browse files Browse the repository at this point in the history
…h enum
  • Loading branch information
Haibin committed Nov 27, 2024
1 parent 0164d5d commit d88de1d
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 25 deletions.
14 changes: 7 additions & 7 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,21 +318,21 @@ class TreeWorkflowMixin:
"""Mixin class for tree models."""

@staticmethod
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Draw the feature importance bar diagram."""
print("-----* Feature Importance Diagram *-----")
print(f"-----* {func_name} *-----")
columns_name = X_train.columns
feature_importances = trained_model.feature_importances_
data = plot_feature_importance(columns_name, feature_importances, image_config)
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path, True)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path, True)

@staticmethod
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Drawing decision tree diagrams."""
print("-----* Single Tree Diagram *-----")
print(f"-----* {func_name} *-----")
plot_decision_tree(trained_model, image_config)
save_fig(f"Tree Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)


class LinearWorkflowMixin:
Expand Down
62 changes: 44 additions & 18 deletions geochemistrypi/data_mining/model/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ..constants import CUSTOMIZE_LABEL_STRATEGY, MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, OPTION, RAY_FLAML, SAMPLE_BALANCE_STRATEGY, SECTION
from ..data.data_readiness import limit_num_input, num2option, num_input
from ..plot.statistic_plot import basic_statistic
from ..utils.base import clear_output, save_data, save_data_without_data_identifier, save_fig, save_text
from ..utils.base import clear_output, save_data, save_fig, save_text
from ._base import LinearWorkflowMixin, TreeWorkflowMixin, WorkflowBase
from .func.algo_classification._common import (
cross_validation,
Expand All @@ -35,7 +35,7 @@
score,
)
from .func.algo_classification._decision_tree import decision_tree_manual_hyper_parameters
from .func.algo_classification._enum import ClassificationCommonFunction
from .func.algo_classification._enum import ClassificationCommonFunction, ClassificationSpecialFunction
from .func.algo_classification._extra_trees import extra_trees_manual_hyper_parameters
from .func.algo_classification._gradient_boosting import gradient_boosting_manual_hyper_parameters
from .func.algo_classification._knn import knn_manual_hyper_parameters
Expand Down Expand Up @@ -124,24 +124,24 @@ def manual_hyper_parameters(cls) -> Dict:
return dict()

@staticmethod
def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> str:
def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, func_name: str) -> str:
"""Print the classification score report of the model."""
print("-----* Model Score *-----")
print(f"-----* {func_name} *-----")
average, scores = score(y_true, y_predict)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Model Score - {algorithm_name}", store_path)
save_text(scores_str, f"{func_name} - {algorithm_name}", store_path)
mlflow.log_metrics(scores)
return average

@staticmethod
def _classification_report(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> None:
def _classification_report(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, func_name: str) -> None:
"""Print the classification report of the model."""
print("-----* Classification Report *-----")
print(f"-----* {func_name} *-----")
print(classification_report(y_true, y_predict))
scores = classification_report(y_true, y_predict, output_dict=True)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Classification Report - {algorithm_name}", store_path)
mlflow.log_artifact(os.path.join(store_path, f"Classification Report - {algorithm_name}.txt"))
save_text(scores_str, f"{func_name} - {algorithm_name}", store_path)
mlflow.log_artifact(os.path.join(store_path, f"{func_name} - {algorithm_name}.txt"))

@staticmethod
def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, graph_name: str, average: str, cv_num: int, algorithm_name: str, store_path: str) -> None:
Expand All @@ -157,7 +157,7 @@ def _plot_confusion_matrix(
y_test: pd.DataFrame, y_test_predict: pd.DataFrame, name_column: str, graph_name: str, trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str
) -> None:
"""Plot the confusion matrix of the model."""
print("-----* {graph_name} *-----")
print(f"-----* {graph_name} *-----")
data = plot_confusion_matrix(y_test, y_test_predict, trained_model)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
index = [f"true_{i}" for i in range(int(y_test.nunique().values))]
Expand Down Expand Up @@ -275,12 +275,14 @@ def common_components(self) -> None:
average = self._score(
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
func_name=ClassificationCommonFunction.MODEL_SCORE.value,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
self._classification_report(
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
func_name=ClassificationCommonFunction.CLASSIFICATION_REPORT.value,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
Expand Down Expand Up @@ -368,12 +370,14 @@ def common_components(self, is_automl: bool) -> None:
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
algorithm_name=self.naming,
func_name=ClassificationCommonFunction.MODEL_SCORE.value,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
self._classification_report(
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
algorithm_name=self.naming,
func_name=ClassificationCommonFunction.CLASSIFICATION_REPORT.value,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
self._cross_validation(
Expand Down Expand Up @@ -936,13 +940,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -957,13 +963,15 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -1255,13 +1263,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -1276,13 +1286,15 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -1634,6 +1646,7 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -1648,6 +1661,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -1889,12 +1903,12 @@ def manual_hyper_parameters(cls) -> Dict:
return hyper_parameters

@staticmethod
def _plot_feature_importance(columns_name: np.ndarray, name_column: str, trained_model: any, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_feature_importance(columns_name: np.ndarray, name_column: str, trained_model: any, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Print the feature coefficient value orderly."""
print("-----* Feature Importance *-----")
print(f"-----* {func_name} *-----")
data = plot_logistic_importance(columns_name, trained_model)
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@dispatch()
def special_components(self, **kwargs) -> None:
Expand All @@ -1916,6 +1930,7 @@ def special_components(self, **kwargs) -> None:
name_column=LogisticRegressionClassification.name_all,
trained_model=self.model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -1940,6 +1955,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
name_column=LogisticRegressionClassification.name_all,
trained_model=self.auto_model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -2258,13 +2274,13 @@ def manual_hyper_parameters(cls) -> Dict:
return hyper_parameters

@staticmethod
def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_loss_curve(trained_model: object, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Plot the learning curve of the trained model."""
print("-----* Loss Curve Diagram *-----")
print(f"-----* {func_name} *-----")
data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"])
data.plot(title="Loss")
save_fig(f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
save_data_without_data_identifier(data, f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@dispatch()
def special_components(self, **kwargs) -> None:
Expand All @@ -2274,6 +2290,7 @@ def special_components(self, **kwargs) -> None:
self._plot_loss_curve(
trained_model=self.model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.LOSS_CURVE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -2286,6 +2303,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
self._plot_loss_curve(
trained_model=self.auto_model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.LOSS_CURVE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -2555,13 +2573,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -2576,13 +2596,15 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -2920,13 +2942,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model.estimators_[0][0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -2941,13 +2965,15 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model.estimators_[0][0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

class ClassificationCommonFunction(Enum):
MODEL_SCORE = "Model Score"
CLASSIFICATION_REPORT = "Classification Report"
CONFUSION_MATRIX = "Confusion Matrix"
CROSS_VALIDATION = "Cross Validation"
MODEL_PREDICTION = "Model Prediction"
Expand All @@ -12,3 +13,9 @@ class ClassificationCommonFunction(Enum):
ROC_CURVE = "ROC Curve"
TWO_DIMENSIONAL_DECISION_BOUNDARY_DIAGRAM = "Two-dimensional Decision Boundary Diagram"
PERMUTATION_IMPORTANCE_DIAGRAM = "Permutation Importance Diagram"


class ClassificationSpecialFunction(Enum):
FEATURE_IMPORTANCE = "Feature Importance"
LOSS_CURVE_DIAGRAM = "Loss Curve Diagram"
TREE_DIAGRAM = "Tree Diagram"

0 comments on commit d88de1d

Please sign in to comment.