Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: replace classification special & common function output with enum #405

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,21 +318,21 @@ class TreeWorkflowMixin:
"""Mixin class for tree models."""

@staticmethod
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Draw the feature importance bar diagram."""
print("-----* Feature Importance Diagram *-----")
print(f"-----* {func_name} *-----")
columns_name = X_train.columns
feature_importances = trained_model.feature_importances_
data = plot_feature_importance(columns_name, feature_importances, image_config)
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path, True)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path, True)

@staticmethod
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Drawing decision tree diagrams."""
print("-----* Single Tree Diagram *-----")
print(f"-----* {func_name} *-----")
plot_decision_tree(trained_model, image_config)
save_fig(f"Tree Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)


class LinearWorkflowMixin:
Expand Down
62 changes: 44 additions & 18 deletions geochemistrypi/data_mining/model/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ..constants import CUSTOMIZE_LABEL_STRATEGY, MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, OPTION, RAY_FLAML, SAMPLE_BALANCE_STRATEGY, SECTION
from ..data.data_readiness import limit_num_input, num2option, num_input
from ..plot.statistic_plot import basic_statistic
from ..utils.base import clear_output, save_data, save_data_without_data_identifier, save_fig, save_text
from ..utils.base import clear_output, save_data, save_fig, save_text
from ._base import LinearWorkflowMixin, TreeWorkflowMixin, WorkflowBase
from .func.algo_classification._common import (
cross_validation,
Expand All @@ -35,7 +35,7 @@
score,
)
from .func.algo_classification._decision_tree import decision_tree_manual_hyper_parameters
from .func.algo_classification._enum import ClassificationCommonFunction
from .func.algo_classification._enum import ClassificationCommonFunction, ClassificationSpecialFunction
from .func.algo_classification._extra_trees import extra_trees_manual_hyper_parameters
from .func.algo_classification._gradient_boosting import gradient_boosting_manual_hyper_parameters
from .func.algo_classification._knn import knn_manual_hyper_parameters
Expand Down Expand Up @@ -124,24 +124,24 @@ def manual_hyper_parameters(cls) -> Dict:
return dict()

@staticmethod
def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> str:
def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, func_name: str) -> str:
"""Print the classification score report of the model."""
print("-----* Model Score *-----")
print(f"-----* {func_name} *-----")
average, scores = score(y_true, y_predict)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Model Score - {algorithm_name}", store_path)
save_text(scores_str, f"{func_name} - {algorithm_name}", store_path)
mlflow.log_metrics(scores)
return average

@staticmethod
def _classification_report(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> None:
def _classification_report(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, func_name: str) -> None:
"""Print the classification report of the model."""
print("-----* Classification Report *-----")
print(f"-----* {func_name} *-----")
print(classification_report(y_true, y_predict))
scores = classification_report(y_true, y_predict, output_dict=True)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Classification Report - {algorithm_name}", store_path)
mlflow.log_artifact(os.path.join(store_path, f"Classification Report - {algorithm_name}.txt"))
save_text(scores_str, f"{func_name} - {algorithm_name}", store_path)
mlflow.log_artifact(os.path.join(store_path, f"{func_name} - {algorithm_name}.txt"))

@staticmethod
def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, graph_name: str, average: str, cv_num: int, algorithm_name: str, store_path: str) -> None:
Expand All @@ -157,7 +157,7 @@ def _plot_confusion_matrix(
y_test: pd.DataFrame, y_test_predict: pd.DataFrame, name_column: str, graph_name: str, trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str
) -> None:
"""Plot the confusion matrix of the model."""
print("-----* {graph_name} *-----")
print(f"-----* {graph_name} *-----")
data = plot_confusion_matrix(y_test, y_test_predict, trained_model)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
index = [f"true_{i}" for i in range(int(y_test.nunique().values))]
Expand Down Expand Up @@ -275,12 +275,14 @@ def common_components(self) -> None:
average = self._score(
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
func_name=ClassificationCommonFunction.MODEL_SCORE.value,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
self._classification_report(
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
func_name=ClassificationCommonFunction.CLASSIFICATION_REPORT.value,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
Expand Down Expand Up @@ -368,12 +370,14 @@ def common_components(self, is_automl: bool) -> None:
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
algorithm_name=self.naming,
func_name=ClassificationCommonFunction.MODEL_SCORE.value,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
self._classification_report(
y_true=ClassificationWorkflowBase.y_test,
y_predict=ClassificationWorkflowBase.y_test_predict,
algorithm_name=self.naming,
func_name=ClassificationCommonFunction.CLASSIFICATION_REPORT.value,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
self._cross_validation(
Expand Down Expand Up @@ -936,13 +940,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -957,13 +963,15 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -1255,13 +1263,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -1276,13 +1286,15 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -1634,6 +1646,7 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -1648,6 +1661,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -1889,12 +1903,12 @@ def manual_hyper_parameters(cls) -> Dict:
return hyper_parameters

@staticmethod
def _plot_feature_importance(columns_name: np.ndarray, name_column: str, trained_model: any, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_feature_importance(columns_name: np.ndarray, name_column: str, trained_model: any, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None:
"""Print the feature coefficient value orderly."""
print("-----* Feature Importance *-----")
print(f"-----* {func_name} *-----")
data = plot_logistic_importance(columns_name, trained_model)
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@dispatch()
def special_components(self, **kwargs) -> None:
Expand All @@ -1916,6 +1930,7 @@ def special_components(self, **kwargs) -> None:
name_column=LogisticRegressionClassification.name_all,
trained_model=self.model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -1940,6 +1955,7 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
name_column=LogisticRegressionClassification.name_all,
trained_model=self.auto_model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -2258,13 +2274,13 @@ def manual_hyper_parameters(cls) -> Dict:
return hyper_parameters

@staticmethod
def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_loss_curve(trained_model: object, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Plot the learning curve of the trained model."""
print("-----* Loss Curve Diagram *-----")
print(f"-----* {func_name} *-----")
data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"])
data.plot(title="Loss")
save_fig(f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
save_data_without_data_identifier(data, f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@dispatch()
def special_components(self, **kwargs) -> None:
Expand All @@ -2274,6 +2290,7 @@ def special_components(self, **kwargs) -> None:
self._plot_loss_curve(
trained_model=self.model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.LOSS_CURVE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -2286,6 +2303,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
self._plot_loss_curve(
trained_model=self.auto_model,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.LOSS_CURVE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -2555,13 +2573,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -2576,13 +2596,15 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model.estimators_[0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down Expand Up @@ -2920,13 +2942,15 @@ def special_components(self, **kwargs) -> None:
trained_model=self.model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.model.estimators_[0][0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand All @@ -2941,13 +2965,15 @@ def special_components(self, is_automl: bool, **kwargs) -> None:
trained_model=self.auto_model,
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.FEATURE_IMPORTANCE.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
self._plot_tree(
trained_model=self.auto_model.estimators_[0][0],
image_config=self.image_config,
algorithm_name=self.naming,
func_name=ClassificationSpecialFunction.TREE_DIAGRAM.value,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

class ClassificationCommonFunction(Enum):
MODEL_SCORE = "Model Score"
CLASSIFICATION_REPORT = "Classification Report"
CONFUSION_MATRIX = "Confusion Matrix"
CROSS_VALIDATION = "Cross Validation"
MODEL_PREDICTION = "Model Prediction"
Expand All @@ -12,3 +13,9 @@ class ClassificationCommonFunction(Enum):
ROC_CURVE = "ROC Curve"
TWO_DIMENSIONAL_DECISION_BOUNDARY_DIAGRAM = "Two-dimensional Decision Boundary Diagram"
PERMUTATION_IMPORTANCE_DIAGRAM = "Permutation Importance Diagram"


class ClassificationSpecialFunction(Enum):
FEATURE_IMPORTANCE = "Feature Importance"
LOSS_CURVE_DIAGRAM = "Loss Curve Diagram"
TREE_DIAGRAM = "Tree Diagram"