Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: replace code with enum class #386

Merged
merged 2 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 39 additions & 39 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
# Stage 1: Build the frontend
FROM node:latest as frontend-builder

# Set the working directory
WORKDIR /app

# Install frontent dependencies
COPY geochemistrypi/frontend/package.json /app/
RUN yarn install

# Stage 2: Build the backend
FROM python:3.9-slim AS backend-builder

# Set the working directory
WORKDIR /app

# Install backend dependencies
COPY requirements/production.txt /app/
RUN pip install -r production.txt

# Special case for Debian OS, update package lists and install Git and Node.js
RUN apt-get update && apt-get install -y libgomp1 git
RUN apt-get update && apt-get install -y nodejs
RUN apt-get update && apt-get install -y npm

# Install Yarn
RUN npm install -g yarn

# Copy the rest of the code
COPY . .

# Expose the port
EXPOSE 8000 3001

# Mount the volume
VOLUME /app

# Dummy CMD to prevent container from exiting immediately
CMD ["tail", "-f", "/dev/null"]
# Stage 1: Build the frontend
FROM node:latest as frontend-builder

# Set the working directory
WORKDIR /app

# Install frontent dependencies
COPY geochemistrypi/frontend/package.json /app/
RUN yarn install

# Stage 2: Build the backend
FROM python:3.9-slim AS backend-builder

# Set the working directory
WORKDIR /app

# Install backend dependencies
COPY requirements/production.txt /app/
RUN pip install -r production.txt

# Special case for Debian OS, update package lists and install Git and Node.js
RUN apt-get update && apt-get install -y libgomp1 git
RUN apt-get update && apt-get install -y nodejs
RUN apt-get update && apt-get install -y npm

# Install Yarn
RUN npm install -g yarn

# Copy the rest of the code
COPY . .

# Expose the port
EXPOSE 8000 3001

# Mount the volume
VOLUME /app

# Dummy CMD to prevent container from exiting immediately
CMD ["tail", "-f", "/dev/null"]
43 changes: 26 additions & 17 deletions geochemistrypi/data_mining/model/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,43 +80,45 @@ def _score(data: pd.DataFrame, labels: pd.Series, func_name: str, algorithm_name
mlflow.log_metrics(scores)

@staticmethod
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _scatter2d(data: pd.DataFrame, labels: pd.Series, name_column: str, cluster_centers_: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot the two-dimensional diagram of the clustering result."""
print("-----* Cluster Two-Dimensional Diagram *-----")
print(f"-----* {grah_name} *-----")
scatter2d(data, labels, cluster_centers_, algorithm_name)
save_fig(f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _scatter3d(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot the three-dimensional diagram of the clustering result."""
print("-----* Cluster Three-Dimensional Diagram *-----")
print(f"-----* {grah_name} *-----")
scatter3d(data, labels, algorithm_name)
save_fig(f"Cluster Three-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, f"Cluster Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_silhouette_diagram(
data: pd.DataFrame, labels: pd.Series, name_column: str, model: object, cluster_centers_: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str
) -> None:
"""Plot the silhouette diagram of the clustering result."""
print("-----* Silhouette Diagram *-----")
print(f"-----* {grah_name} *-----")
plot_silhouette_diagram(data, labels, cluster_centers_, model, algorithm_name)
save_fig(f"Silhouette Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, "Silhouette Diagram - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)
if not isinstance(cluster_centers_, str):
cluster_center_data = pd.DataFrame(cluster_centers_, columns=data.columns)
save_data(cluster_center_data, name_column, "Silhouette Diagram - Cluster Centers", local_path, mlflow_path)
save_data(cluster_center_data, name_column, f"{grah_name} - Cluster Centers", local_path, mlflow_path)

@staticmethod
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_silhouette_value_diagram(data: pd.DataFrame, labels: pd.Series, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot the silhouette value diagram of the clustering result."""
print("-----* Silhouette value Diagram *-----")
print(f"-----* {grah_name} *-----")
plot_silhouette_value_diagram(data, labels, algorithm_name)
save_fig(f"Silhouette value Diagram - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
data_with_labels = pd.concat([data, labels], axis=1)
save_data(data_with_labels, name_column, "Silhouette value Diagram - Data With Labels", local_path, mlflow_path)
save_data(data_with_labels, name_column, f"{grah_name} - Data With Labels", local_path, mlflow_path)

def common_components(self) -> None:
"""Invoke all common application functions for clustering algorithms."""
Expand Down Expand Up @@ -157,6 +159,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# choose three of dimensions to draw
Expand All @@ -168,6 +171,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 3:
# choose two of dimensions to draw
Expand All @@ -180,6 +184,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)

# no need to choose
Expand All @@ -190,6 +195,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_THREE_DIMENSIONAL_DIAGRAM.value,
)
elif self.X.shape[1] == 2:
self._scatter2d(
Expand All @@ -200,6 +206,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.CLUSTER_TWO_DIMENSIONAL_DIAGRAM.value,
)
else:
pass
Expand All @@ -213,6 +220,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_DIAGRAM.value,
)
self._plot_silhouette_value_diagram(
data=self.X,
Expand All @@ -221,6 +229,7 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=ClusteringCommonFunction.SILHOUETTE_VALUE_DIAGRAM.value,
)


Expand Down
27 changes: 15 additions & 12 deletions geochemistrypi/data_mining/model/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,28 @@ def _reduced_data2pd(self, reduced_data: np.ndarray, components_num: int) -> Non
self.X_reduced.columns = pa_name

@staticmethod
def _plot_2d_scatter_diagram(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_2d_scatter_diagram(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot the two-dimensional diagram of the decomposition result."""
print("-----* Decomposition Two-Dimensional Diagram *-----")
print(f"-----* {grah_name} *-----")
plot_2d_scatter_diagram(data, algorithm_name)
save_fig(f"Decomposition Two-Dimensional Diagram - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Decomposition Two-Dimensional Data - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_heatmap(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_heatmap(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot a heatmap for the decomposition result."""
print("-----* Decomposition Heatmap *-----")
print(f"-----* {grah_name} *-----")
plot_heatmap(data, algorithm_name)
save_fig(f"Decomposition Heatmap - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Decomposition Heatmap Data - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

@staticmethod
def _plot_contour(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _plot_contour(data: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None:
"""Plot a contour plot for dimensionality reduction results."""
print("-----* Dimensionality Reduction Contour Plot *-----")
print(f"-----* {grah_name} *-----")
plot_contour(data, algorithm_name)
save_fig(f"Dimensionality Reduction Contour Plot - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"Dimensionality Reduction Contour Plot Data - {algorithm_name}", local_path, mlflow_path)
save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path)
save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path)

def common_components(self) -> None:
"""Invoke all common application functions for decomposition algorithms by Scikit-learn framework."""
Expand All @@ -100,20 +100,23 @@ def common_components(self) -> None:
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=DecompositionCommonFunction.DECOMPOSITION_TWO_DIMENSIONAL_DIAGRAM.value,
)
self._plot_heatmap(
data=self.X,
name_column=self.name_all,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=DecompositionCommonFunction.DECOMPOSITION_HEATMAP.value,
)
self._plot_contour(
data=self.X,
name_column=self.name_all,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
grah_name=DecompositionCommonFunction.DIMENSIONALITY_REDUCTION_CONTOUR_PLOT.value,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ class ClusteringCommonFunction(Enum):
CLUSTER_LABELS = "Cluster Labels"
MODEL_PERSISTENCE = "Model Persistence"
MODEL_SCORE = "Model Score"
CLUSTER_TWO_DIMENSIONAL_DIAGRAM = "Cluster Two-Dimensional Diagram"
CLUSTER_THREE_DIMENSIONAL_DIAGRAM = "Cluster Three-Dimensional Diagram"
SILHOUETTE_DIAGRAM = "Silhouette Diagram"
SILHOUETTE_VALUE_DIAGRAM = "Silhouette value Diagram"


class KMeansSpecialFunction(Enum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

class DecompositionCommonFunction(Enum):
MODEL_PERSISTENCE = "Model Persistence"
DECOMPOSITION_TWO_DIMENSIONAL = "Decomposition Two-Dimensional Diagram"
DECOMPOSITION_HEATMAP = "Decomposition Heatmap"
DIMENSIONALITY_REDUCTION_CONTOUR_PLOT = "Dimensionality Reduction Contour Plot"


class PCASpecialFunction(Enum):
Expand Down
Loading