Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: relplace code with enum class in regression #396

Merged
merged 5 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 33 additions & 28 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,39 +301,40 @@ def _plot_permutation_importance(
trained_model: object,
image_config: dict,
algorithm_name: str,
graph_name: str,
local_path: str,
mlflow_path: str,
) -> None:
"""Permutation importance plot."""
print("-----* Permutation Importance Diagram *-----")
print(f"-----* {graph_name} *-----") # Permutation Importance
importances_mean, importances_std, importances = plot_permutation_importance(X_test, y_test, trained_model, image_config)
save_fig(f"Permutation Importance - {algorithm_name}", local_path, mlflow_path)
save_data(X_test, name_column, "Permutation Importance - X Test", local_path, mlflow_path)
save_data(y_test, name_column, "Permutation Importance - Y Test", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(X_test, name_column, f"{graph_name} - X Test", local_path, mlflow_path)
save_data(y_test, name_column, f"{graph_name} - Y Test", local_path, mlflow_path)
data_dict = {"importances_mean": importances_mean.tolist(), "importances_std": importances_std.tolist(), "importances": importances.tolist()}
data_str = json.dumps(data_dict, indent=4)
save_text(data_str, f"Permutation Importance - {algorithm_name}", local_path, mlflow_path)
save_text(data_str, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)


class TreeWorkflowMixin:
"""Mixin class for tree models."""

@staticmethod
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Draw the feature importance bar diagram."""
print("-----* Feature Importance Diagram *-----")
print(f"-----* {func_name} *-----") # Feature Importance Diagram
columns_name = X_train.columns
feature_importances = trained_model.feature_importances_
data = plot_feature_importance(columns_name, feature_importances, image_config)
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path, True)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path, True)

@staticmethod
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Drawing decision tree diagrams."""
print("-----* Single Tree Diagram *-----")
print(f"-----* {func_name} *-----") # Single Tree Diagram
plot_decision_tree(trained_model, image_config)
save_fig(f"Tree Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)


class LinearWorkflowMixin:
Expand All @@ -350,40 +351,44 @@ def _show_formula(
save_text(formula_str, f"{algorithm_name} Formula", local_path, mlflow_path)

@staticmethod
def _plot_2d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_2d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Plot the 2D graph of the linear regression model."""
print("-----* 2D Scatter Diagram *-----")
print(f"-----* {func_name} *-----") # 2D Scatter Diagram
plot_2d_scatter_diagram(feature_data, target_data)
save_fig(f"2D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data], axis=1)
save_data(data, data_name, f"2D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_2d_line_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_2d_line_diagram(
feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str
) -> None:
"""Plot the 2D graph of the linear regression model."""
print("-----* 2D Line Diagram *-----")
print(f"-----* {func_name} *-----") # 2D Line Diagram
plot_2d_line_diagram(feature_data, target_data, y_test_predict)
save_fig(f"2D Line Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data, y_test_predict], axis=1)
save_data(data, data_name, f"2D Line Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_3d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_3d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Plot the 3D graph of the linear regression model."""
print("-----* 3D Scatter Diagram *-----")
print(f"-----* {func_name} *-----") # 3D Scatter Diagram
plot_3d_scatter_diagram(feature_data, target_data)
save_fig(f"3D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data], axis=1)
save_data(data, data_name, f"3D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_3d_surface_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_3d_surface_diagram(
feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str
) -> None:
"""Plot the 3D graph of the linear regression model."""
print("-----* 3D Surface Diagram *-----")
print(f"-----* {func_name} *-----") # 3D Surface Diagram
plot_3d_surface_diagram(feature_data, target_data, y_test_predict)
save_fig(f"3D Surface Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data, y_test_predict], axis=1)
save_data(data, data_name, f"3D Surface Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)


class ClusteringMetricsMixin:
Expand Down
48 changes: 24 additions & 24 deletions geochemistrypi/data_mining/model/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,45 +80,45 @@ def _score(data: pd.DataFrame, labels: pd.Series, func_name: str, algorithm_name
mlflow.log_metrics(scores)

@staticmethod
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the two-dimensional diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
scatter2d(data, labels, cluster_centers_, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the three-dimensional diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
scatter3d(data, labels, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_diagram(
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str
) -> None:
"""Plot the silhouette diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_silhouette_diagram(data, labels, cluster_centers_, model, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - Data With Labels", local_path, mlflow_path)
if not isinstance(cluster_centers_, str):
cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns)
save_data(cluster_center_data, name_column, f"{grah_name} - Cluster Centers", local_path, mlflow_path)
save_data(cluster_center_data, name_column, f"{graph_name} - Cluster Centers", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the silhouette value diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_silhouette_value_diagram(data, labels, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - Data With Labels", local_path, mlflow_path)

def common_components(self) -> None:
"""Invoke all common application functions for clustering algorithms."""
Expand Down Expand Up @@ -159,7 +159,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# choose three of dimensions to draw
Expand All @@ -171,7 +171,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 3:
# choose two of dimensions to draw
Expand All @@ -184,7 +184,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# no need to choose
Expand All @@ -195,7 +195,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 2:
self._scatter2d(
Expand All @@ -206,7 +206,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)
else:
pass
Expand All @@ -220,7 +220,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
graph_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
)
self._plot_silhouette_value_diagram(
data=self.X,
Expand All @@ -229,7 +229,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
graph_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
)


Expand Down
Loading
Loading