Skip to content

Commit

Permalink
refactor: uniform code style in regression.py
Browse files Browse the repository at this point in the history
  • Loading branch information
clara-sq committed Jan 16, 2025
1 parent b46d557 commit 9de77e5
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 219 deletions.
12 changes: 6 additions & 6 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,19 +301,19 @@ def _plot_permutation_importance(
trained_model: object,
image_config: dict,
algorithm_name: str,
grah_name: str,
graph_name: str,
local_path: str,
mlflow_path: str,
) -> None:
"""Permutation importance plot."""
print(f"-----* {grah_name} *-----") # Permutation Importance
print(f"-----* {graph_name} *-----") # Permutation Importance
importances_mean, importances_std, importances = plot_permutation_importance(X_test, y_test, trained_model, image_config)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(X_test, name_column, f"{grah_name} - X Test", local_path, mlflow_path)
save_data(y_test, name_column, f"{grah_name} - Y Test", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(X_test, name_column, f"{graph_name} - X Test", local_path, mlflow_path)
save_data(y_test, name_column, f"{graph_name} - Y Test", local_path, mlflow_path)
data_dict = {"importances_mean": importances_mean.tolist(), "importances_std": importances_std.tolist(), "importances": importances.tolist()}
data_str = json.dumps(data_dict, indent=4)
save_text(data_str, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_text(data_str, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)


class TreeWorkflowMixin:
Expand Down
48 changes: 24 additions & 24 deletions geochemistrypi/data_mining/model/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,45 +80,45 @@ def _score(data: pd.DataFrame, labels: pd.Series, func_name: str, algorithm_name
mlflow.log_metrics(scores)

@staticmethod
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the two-dimensional diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
scatter2d(data, labels, cluster_centers_, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the three-dimensional diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
scatter3d(data, labels, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_diagram(
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str
) -> None:
"""Plot the silhouette diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_silhouette_diagram(data, labels, cluster_centers_, model, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - Data With Labels", local_path, mlflow_path)
if not isinstance(cluster_centers_, str):
cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns)
save_data(cluster_center_data, name_column, f"{grah_name} - Cluster Centers", local_path, mlflow_path)
save_data(cluster_center_data, name_column, f"{graph_name} - Cluster Centers", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the silhouette value diagram of the clustering result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_silhouette_value_diagram(data, labels, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{graph_name} - Data With Labels", local_path, mlflow_path)

def common_components(self) -> None:
"""Invoke all common application functions for clustering algorithms."""
Expand Down Expand Up @@ -159,7 +159,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# choose three of dimensions to draw
Expand All @@ -171,7 +171,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 3:
# choose two of dimensions to draw
Expand All @@ -184,7 +184,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# no need to choose
Expand All @@ -195,7 +195,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 2:
self._scatter2d(
Expand All @@ -206,7 +206,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)
else:
pass
Expand All @@ -220,7 +220,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
graph_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
)
self._plot_silhouette_value_diagram(
data=self.X,
Expand All @@ -229,7 +229,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
graph_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
)


Expand Down
30 changes: 15 additions & 15 deletions geochemistrypi/data_mining/model/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,28 @@ def _reduced_data2pd(self, reduced_data: np.ndarray, components_num: int) -> Non
self.X_reduced.columns = pa_name

@staticmethod
def _plot_2d_scatter_diagram(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _plot_2d_scatter_diagram(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot the two-dimensional diagram of the decomposition result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_2d_scatter_diagram(data, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_heatmap(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _plot_heatmap(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot a heatmap for the decomposition result."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_heatmap(data, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_contour(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
def _plot_contour(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, graph_name: str) -> None:
"""Plot a contour plot for dimensionality reduction results."""
print(f"-----* {grah_name} *-----")
print(f"-----* {graph_name} *-----")
plot_contour(data, algorithm_name)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{graph_name} - {algorithm_name}", local_path, mlflow_path)

def common_components(self) -> None:
"""Invoke all common application functions for decomposition algorithms by Scikit-learn framework."""
Expand All @@ -100,23 +100,23 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=DecompositionCommonFunction.DECOMPOSITION_TWO_DIMENSIONAL_DIAGRAM.value,
graph_name=DecompositionCommonFunction.DECOMPOSITION_TWO_DIMENSIONAL_DIAGRAM.value,
)
self._plot_heatmap(
data=self.X,
name_column=self.name_all,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=DecompositionCommonFunction.DECOMPOSITION_HEATMAP.value,
graph_name=DecompositionCommonFunction.DECOMPOSITION_HEATMAP.value,
)
self._plot_contour(
data=self.X,
name_column=self.name_all,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=DecompositionCommonFunction.DIMENSIONALITY_REDUCTION_CONTOUR_PLOT.value,
graph_name=DecompositionCommonFunction.DIMENSIONALITY_REDUCTION_CONTOUR_PLOT.value,
)


Expand Down
29 changes: 29 additions & 0 deletions geochemistrypi/data_mining/model/func/algo_regression/_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,54 @@ class RegressionSpecialFunction(Enum):
THREE_DIMENSIONAL_SURFACE_DIAGRAM = "3D Surface Diagram"


class XGBoostSpecialFunction(Enum):
FEATURE_IMPORTANCE_DIAGRAM = "Feature Importance Diagram"


class DecisionTreeSpecialFunction(Enum):
FEATURE_IMPORTANCE_DIAGRAM = "Feature Importance Diagram"
SINGLE_TREE_DIAGRAM = "Single Tree Diagram"


class MLPSpecialFunction(Enum):
LOSS_CURVE_DIAGRAM = "Loss Curve Diagram"


class ClassicalLinearSpecialFunction(Enum):
LINEAR_REGRESSION_FORMULA = "Linear Regression Formula"
TWO_DIMENSIONAL_SCATTER_DIAGRAM = "2D Scatter Diagram"
THREE_DIMENSIONAL_SCATTER_DIAGRAM = "3D Scatter Diagram"
TWO_DIMENSIONAL_LINE_DIAGRAM = "2D Line Diagram"
THREE_DIMENSIONAL_SURFACE_DIAGRAM = "3D Surface Diagram"


class LassoSpecialFunction(Enum):
LASSO_REGRESSION_FORMULA = "Lasso Regression Formula"
TWO_DIMENSIONAL_SCATTER_DIAGRAM = "2D Scatter Diagram"
THREE_DIMENSIONAL_SCATTER_DIAGRAM = "3D Scatter Diagram"
TWO_DIMENSIONAL_LINE_DIAGRAM = "2D Line Diagram"
THREE_DIMENSIONAL_SURFACE_DIAGRAM = "3D Surface Diagram"


class ElasticNetSpecialFunction(Enum):
ELASTIC_NET_FORMULA = "Elastic Net Formula"
TWO_DIMENSIONAL_SCATTER_DIAGRAM = "2D Scatter Diagram"
THREE_DIMENSIONAL_SCATTER_DIAGRAM = "3D Scatter Diagram"
TWO_DIMENSIONAL_LINE_DIAGRAM = "2D Line Diagram"
THREE_DIMENSIONAL_SURFACE_DIAGRAM = "3D Surface Diagram"


class SGDSpecialFunction(Enum):
SGD_REGRESSION_FORMULA = "SGD Regression Formula"
TWO_DIMENSIONAL_SCATTER_DIAGRAM = "2D Scatter Diagram"
THREE_DIMENSIONAL_SCATTER_DIAGRAM = "3D Scatter Diagram"
TWO_DIMENSIONAL_LINE_DIAGRAM = "2D Line Diagram"
THREE_DIMENSIONAL_SURFACE_DIAGRAM = "3D Surface Diagram"


class RidgeSpecialFunction(Enum):
RIDGE_REGRESSION_FORMULA = "Ridge Regression Formula"
TWO_DIMENSIONAL_SCATTER_DIAGRAM = "2D Scatter Diagram"
THREE_DIMENSIONAL_SCATTER_DIAGRAM = "3D Scatter Diagram"
TWO_DIMENSIONAL_LINE_DIAGRAM = "2D Line Diagram"
THREE_DIMENSIONAL_SURFACE_DIAGRAM = "3D Surface Diagram"
Loading

0 comments on commit 9de77e5

Please sign in to comment.