From 1ed458c77a73ba14baeff19e6ce3912a3b775f13 Mon Sep 17 00:00:00 2001 From: clara-sq Date: Wed, 18 Sep 2024 13:57:35 +0000 Subject: [PATCH 1/2] test:use docker for push and test branch --- Dockerfile | 78 +++++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/Dockerfile b/Dockerfile index ef625371..1957eb9b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,39 +1,39 @@ -# Stage 1: Build the frontend -FROM node:latest as frontend-builder - -# Set the working directory -WORKDIR /app - -# Install frontent dependencies -COPY geochemistrypi/frontend/package.json /app/ -RUN yarn install - -# Stage 2: Build the backend -FROM python:3.9-slim AS backend-builder - -# Set the working directory -WORKDIR /app - -# Install backend dependencies -COPY requirements/production.txt /app/ -RUN pip install -r production.txt - -# Special case for Debian OS, update package lists and install Git and Node.js -RUN apt-get update && apt-get install -y libgomp1 git -RUN apt-get update && apt-get install -y nodejs -RUN apt-get update && apt-get install -y npm - -# Install Yarn -RUN npm install -g yarn - -# Copy the rest of the code -COPY . . - -# Expose the port -EXPOSE 8000 3001 - -# Mount the volume -VOLUME /app - -# Dummy CMD to prevent container from exiting immediately -CMD ["tail", "-f", "/dev/null"] +# Stage 1: Build the frontend +FROM node:latest as frontend-builder + +# Set the working directory +WORKDIR /app + +# Install frontent dependencies +COPY geochemistrypi/frontend/package.json /app/ +RUN yarn install + +# Stage 2: Build the backend +FROM python:3.9-slim AS backend-builder + +# Set the working directory +WORKDIR /app + +# Install backend dependencies +COPY requirements/production.txt /app/ +RUN pip install -r production.txt + +# Special case for Debian OS, update package lists and install Git and Node.js +RUN apt-get update && apt-get install -y libgomp1 git +RUN apt-get update && apt-get install -y nodejs +RUN apt-get update && apt-get install -y npm + +# Install Yarn +RUN npm install -g yarn + +# Copy the rest of the code +COPY . . + +# Expose the port +EXPOSE 8000 3001 + +# Mount the volume +VOLUME /app +# test test test +# Dummy CMD to prevent container from exiting immediately +CMD ["tail", "-f", "/dev/null"] From 660de67b65692b5fcaf5f2d7e17b3f274cf5c979 Mon Sep 17 00:00:00 2001 From: clara-sq Date: Wed, 18 Sep 2024 14:28:18 +0000 Subject: [PATCH 2/2] refactor:replace code with enum class --- Dockerfile | 2 +- .../data_mining/model/clustering.py | 43 +++++++++++-------- .../data_mining/model/decomposition.py | 27 ++++++------ .../model/func/algo_clustering/_enum.py | 4 ++ .../model/func/algo_decomposition/_enum.py | 3 ++ 5 files changed, 49 insertions(+), 30 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1957eb9b..dd2fdcf9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,6 +34,6 @@ EXPOSE 8000 3001 # Mount the volume VOLUME /app -# test test test + # Dummy CMD to prevent container from exiting immediately CMD ["tail", "-f", "/dev/null"] diff --git a/geochemistrypi/data_mining/model/clustering.py b/geochemistrypi/data_mining/model/clustering.py index 109da3a1..e646d548 100644 --- a/geochemistrypi/data_mining/model/clustering.py +++ b/geochemistrypi/data_mining/model/clustering.py @@ -80,43 +80,45 @@ def _score(data: pd.DataFrame, labels: pd.Series, func_name: str, algorithm_name mlflow.log_metrics(scores) @staticmethod - def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot the two-dimensional diagram of the clustering result.""" - print("-----* Cluster Two-Dimensional Diagram *-----") + print(f"-----* {grah_name} *-----") scatter2d(data, labels, cluster_centers_, algorithm_name) - save_fig(f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) data_with_labels = pd.concat([data, labels], axis=1) - save_data(data_with_labels, name_column, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path) + save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) @staticmethod - def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot the three-dimensional diagram of the clustering result.""" - print("-----* Cluster Three-Dimensional Diagram *-----") + print(f"-----* {grah_name} *-----") scatter3d(data, labels, algorithm_name) - save_fig(f"Cluster Three-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) data_with_labels = pd.concat([data, labels], axis=1) - save_data(data_with_labels, name_column, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path) + save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) @staticmethod - def _plot_silhouette_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_silhouette_diagram( + data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str + ) -> None: """Plot the silhouette diagram of the clustering result.""" - print("-----* Silhouette Diagram *-----") + print(f"-----* {grah_name} *-----") plot_silhouette_diagram(data, labels, cluster_centers_, model, algorithm_name) - save_fig(f"Silhouette Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) data_with_labels = pd.concat([data, labels], axis=1) - save_data(data_with_labels, name_column, "Silhouette Diagram - Data With Labels", local_path, mlflow_path) + save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path) if not isinstance(cluster_centers_, str): cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns) - save_data(cluster_center_data, name_column, "Silhouette Diagram - Cluster Centers", local_path, mlflow_path) + save_data(cluster_center_data, name_column, f"{grah_name} - Cluster Centers", local_path, mlflow_path) @staticmethod - def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot the silhouette value diagram of the clustering result.""" - print("-----* Silhouette value Diagram *-----") + print(f"-----* {grah_name} *-----") plot_silhouette_value_diagram(data, labels, algorithm_name) - save_fig(f"Silhouette value Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) data_with_labels = pd.concat([data, labels], axis=1) - save_data(data_with_labels, name_column, "Silhouette value Diagram - Data With Labels", local_path, mlflow_path) + save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path) def common_components(self) -> None: """Invoke all common application functions for clustering algorithms.""" @@ -157,6 +159,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value, ) # choose three of dimensions to draw @@ -168,6 +171,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value, ) elif self.X.shape[1] == 3: # choose two of dimensions to draw @@ -180,6 +184,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value, ) # no need to choose @@ -190,6 +195,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value, ) elif self.X.shape[1] == 2: self._scatter2d( @@ -200,6 +206,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value, ) else: pass @@ -213,6 +220,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value, ) self._plot_silhouette_value_diagram( data=self.X, @@ -221,6 +229,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value, ) diff --git a/geochemistrypi/data_mining/model/decomposition.py b/geochemistrypi/data_mining/model/decomposition.py index 95e60d74..c1cd8a26 100644 --- a/geochemistrypi/data_mining/model/decomposition.py +++ b/geochemistrypi/data_mining/model/decomposition.py @@ -67,28 +67,28 @@ def _reduced_data2pd(self, reduced_data: np.ndarray, components_num: int) -> Non self.X_reduced.columns = pa_name @staticmethod - def _plot_2d_scatter_diagram(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_2d_scatter_diagram(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot the two-dimensional diagram of the decomposition result.""" - print("-----* Decomposition Two-Dimensional Diagram *-----") + print(f"-----* {grah_name} *-----") plot_2d_scatter_diagram(data, algorithm_name) - save_fig(f"Decomposition Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path) - save_data(data, name_column, f"Decomposition Two-Dimensional Data - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) @staticmethod - def _plot_heatmap(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_heatmap(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot a heatmap for the decomposition result.""" - print("-----* Decomposition Heatmap *-----") + print(f"-----* {grah_name} *-----") plot_heatmap(data, algorithm_name) - save_fig(f"Decomposition Heatmap - {algorithm_name}", local_path, mlflow_path) - save_data(data, name_column, f"Decomposition Heatmap Data - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) @staticmethod - def _plot_contour(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_contour(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot a contour plot for dimensionality reduction results.""" - print("-----* Dimensionality Reduction Contour Plot *-----") + print(f"-----* {grah_name} *-----") plot_contour(data, algorithm_name) - save_fig(f"Dimensionality Reduction Contour Plot - {algorithm_name}", local_path, mlflow_path) - save_data(data, name_column, f"Dimensionality Reduction Contour Plot Data - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) def common_components(self) -> None: """Invoke all common application functions for decomposition algorithms by Scikit-learn framework.""" @@ -100,6 +100,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=DecompositionCommonFunction.DECOMPOSITION_TWO_DIMENSIONAL_DIAGRAM.value, ) self._plot_heatmap( data=self.X, @@ -107,6 +108,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=DecompositionCommonFunction.DECOMPOSITION_HEATMAP.value, ) self._plot_contour( data=self.X, @@ -114,6 +116,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=DecompositionCommonFunction.DIMENSIONALITY_REDUCTION_CONTOUR_PLOT.value, ) diff --git a/geochemistrypi/data_mining/model/func/algo_clustering/_enum.py b/geochemistrypi/data_mining/model/func/algo_clustering/_enum.py index 307a31aa..87b3ba95 100644 --- a/geochemistrypi/data_mining/model/func/algo_clustering/_enum.py +++ b/geochemistrypi/data_mining/model/func/algo_clustering/_enum.py @@ -6,6 +6,10 @@ class ClusteringCommonFunction(Enum): CLUSTER_LABELS = "Cluster Labels" MODEL_PERSISTENCE = "Model Persistence" MODEL_SCORE = "Model Score" + CLUSTER_TWO_DIMENSIONAL_DIAGRAM = "Cluster Two-Dimensional Diagram" + CLUSTER_THREE_DIMENSIONAL_DIAGRAM = "Cluster Three-Dimensional Diagram" + SILHOUETTE_DIAGRAM = "Silhouette Diagram" + SILHOUETTE_VALUE_DIAGRAM = "Silhouette value Diagram" class KMeansSpecialFunction(Enum): diff --git a/geochemistrypi/data_mining/model/func/algo_decomposition/_enum.py b/geochemistrypi/data_mining/model/func/algo_decomposition/_enum.py index 2cc06785..03dd0175 100644 --- a/geochemistrypi/data_mining/model/func/algo_decomposition/_enum.py +++ b/geochemistrypi/data_mining/model/func/algo_decomposition/_enum.py @@ -3,6 +3,9 @@ class DecompositionCommonFunction(Enum): MODEL_PERSISTENCE = "Model Persistence" + DECOMPOSITION_TWO_DIMENSIONAL = "Decomposition Two-Dimensional Diagram" + DECOMPOSITION_HEATMAP = "Decomposition Heatmap" + DIMENSIONALITY_REDUCTION_CONTOUR_PLOT = "Dimensionality Reduction Contour Plot" class PCASpecialFunction(Enum):