Skip to content

Commit

Permalink
Merge pull request #396 from ZJUEarthData/dev/siqi
Browse files Browse the repository at this point in the history
refactor: relplace code with enum class in regression
  • Loading branch information
SanyHe authored Jan 20, 2025
2 parents b0aed08 + 9de77e5 commit 4751992
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 98 deletions.
61 changes: 33 additions & 28 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,39 +301,40 @@ def _plot_permutation_importance(
trained_model: object,
image_config: dict,
algorithm_name: str,
graph_name: str,
local_path: str,
mlflow_path: str,
) -> None:
"""Permutation importance plot."""
print("-----* Permutation Importance Diagram *-----")
print(f"-----* {graph_name} *-----") # Permutation Importance
importances_mean, importances_std, importances = plot_permutation_importance(X_test, y_test, trained_model, image_config)
save_fig(f"Permutation Importance - {algorithm_name}", local_path, mlflow_path)
save_data(X_test, name_column, "Permutation Importance - X Test", local_path, mlflow_path)
save_data(y_test, name_column, "Permutation Importance - Y Test", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(X_test, name_column, f"{graph_name} - X Test", local_path, mlflow_path)
save_data(y_test, name_column, f"{graph_name} - Y Test", local_path, mlflow_path)
data_dict = {"importances_mean": importances_mean.tolist(), "importances_std": importances_std.tolist(), "importances": importances.tolist()}
data_str = json.dumps(data_dict, indent=4)
save_text(data_str, f"Permutation Importance - {algorithm_name}", local_path, mlflow_path)
save_text(data_str, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)


class TreeWorkflowMixin:
"""Mixin class for tree models."""

@staticmethod
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_feature_importance(X_train: pd.DataFrame, name_column: str, trained_model: object, image_config: dict, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Draw the feature importance bar diagram."""
print("-----* Feature Importance Diagram *-----")
print(f"-----* {func_name} *-----") # Feature Importance Diagram
columns_name = X_train.columns
feature_importances = trained_model.feature_importances_
data = plot_feature_importance(columns_name, feature_importances, image_config)
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Feature Importance - {algorithm_name}", local_path, mlflow_path, True)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{func_name} - {algorithm_name}", local_path, mlflow_path, True)

@staticmethod
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Drawing decision tree diagrams."""
print("-----* Single Tree Diagram *-----")
print(f"-----* {func_name} *-----") # Single Tree Diagram
plot_decision_tree(trained_model, image_config)
save_fig(f"Tree Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)


class LinearWorkflowMixin:
Expand All @@ -350,40 +351,44 @@ def _show_formula(
save_text(formula_str, f"{algorithm_name} Formula", local_path, mlflow_path)

@staticmethod
def _plot_2d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_2d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Plot the 2D graph of the linear regression model."""
print("-----* 2D Scatter Diagram *-----")
print(f"-----* {func_name} *-----") # 2D Scatter Diagram
plot_2d_scatter_diagram(feature_data, target_data)
save_fig(f"2D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data], axis=1)
save_data(data, data_name, f"2D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_2d_line_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_2d_line_diagram(
feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str
) -> None:
"""Plot the 2D graph of the linear regression model."""
print("-----* 2D Line Diagram *-----")
print(f"-----* {func_name} *-----") # 2D Line Diagram
plot_2d_line_diagram(feature_data, target_data, y_test_predict)
save_fig(f"2D Line Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data, y_test_predict], axis=1)
save_data(data, data_name, f"2D Line Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_3d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_3d_scatter_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str) -> None:
"""Plot the 3D graph of the linear regression model."""
print("-----* 3D Scatter Diagram *-----")
print(f"-----* {func_name} *-----") # 3D Scatter Diagram
plot_3d_scatter_diagram(feature_data, target_data)
save_fig(f"3D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data], axis=1)
save_data(data, data_name, f"3D Scatter Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_3d_surface_diagram(feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_3d_surface_diagram(
feature_data: pd.DataFrame, target_data: pd.DataFrame, y_test_predict: pd.DataFrame, data_name: str, algorithm_name: str, func_name: str, local_path: str, mlflow_path: str
) -> None:
"""Plot the 3D graph of the linear regression model."""
print("-----* 3D Surface Diagram *-----")
print(f"-----* {func_name} *-----") # 3D Surface Diagram
plot_3d_surface_diagram(feature_data, target_data, y_test_predict)
save_fig(f"3D Surface Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path)
data = pd.concat([feature_data, target_data, y_test_predict], axis=1)
save_data(data, data_name, f"3D Surface Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, data_name, f"{func_name} - {algorithm_name}", local_path, mlflow_path)


class ClusteringMetricsMixin:
Expand Down
48 changes: 24 additions & 24 deletions geochemistrypi/data_mining/model/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,45 +80,45 @@ def _score(data: pd.DataFrame, labels: pd.Series, func_name: str, algorithm_name
mlflow.log_metrics(scores)

@staticmethod
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the two-dimensional diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
scatter2d(data, labels, cluster_centers_, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the three-dimensional diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
scatter3d(data, labels, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_diagram(
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str
) -> None:
"""Plot the silhouette diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_silhouette_diagram(data, labels, cluster_centers_, model, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - Data With Labels", local_path, mlflow_path)
if not isinstance(cluster_centers_, str):
cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns)
save_data(cluster_center_data, name_column, f"{grah_name} - Cluster Centers", local_path, mlflow_path)
save_data(cluster_center_data, name_column, f"{graph_name} - Cluster Centers", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the silhouette value diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_silhouette_value_diagram(data, labels, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - Data With Labels", local_path, mlflow_path)

def common_components(self) -> None:
"""Invoke all common application functions for clustering algorithms."""
Expand Down Expand Up @@ -159,7 +159,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# choose three of dimensions to draw
Expand All @@ -171,7 +171,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 3:
# choose two of dimensions to draw
Expand All @@ -184,7 +184,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# no need to choose
Expand All @@ -195,7 +195,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 2:
self._scatter2d(
Expand All @@ -206,7 +206,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)
else:
pass
Expand All @@ -220,7 +220,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
graph_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
)
self._plot_silhouette_value_diagram(
data=self.X,
Expand All @@ -229,7 +229,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
graph_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
)


Expand Down
Loading

0 comments on commit 4751992

Please sign in to comment.