Skip to content

Commit

Permalink
Merge pull request #322 from ZJUEarthData/dev/Yongkang
Browse files Browse the repository at this point in the history
feat: store the predicted value of the training set
  • Loading branch information
SanyHe authored Mar 16, 2024
2 parents 7bba815 + 83b07b4 commit b7c5fd4
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
3 changes: 3 additions & 0 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def data_upload(
X_test: Optional[pd.DataFrame] = None,
y_train: Optional[pd.DataFrame] = None,
y_test: Optional[pd.DataFrame] = None,
y_train_predict: Optional[pd.DataFrame] = None,
y_test_predict: Optional[pd.DataFrame] = None,
) -> None:
"""This method loads the required data into the base class's attributes."""
Expand All @@ -219,6 +220,8 @@ def data_upload(
WorkflowBase.y_test = y_test
if y_test_predict is not None:
WorkflowBase.y_test_predict = y_test_predict
if y_train_predict is not None:
WorkflowBase.y_train_predict = y_train_predict

@staticmethod
def data_save(df: pd.DataFrame, df_name: str, local_path: str, mlflow_path: str, slogan: str) -> None:
Expand Down
12 changes: 10 additions & 2 deletions geochemistrypi/data_mining/process/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def activate(

# Use Scikit-learn style API to process input data
self.clf_workflow.fit(X_train, y_train)
y_train_predict = self.clf_workflow.predict(X_train)
y_train_predict = self.clf_workflow.np2pd(y_train_predict, y_train.columns)
self.clf_workflow.data_upload(y_train_predict=y_train_predict)
y_test_predict = self.clf_workflow.predict(X_test)
y_test_predict = self.clf_workflow.np2pd(y_test_predict, y_test.columns)
self.clf_workflow.data_upload(X=X, y=y, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, y_test_predict=y_test_predict)
Expand All @@ -181,7 +184,8 @@ def activate(
self.clf_workflow.special_components()

# Save the prediction result
self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction")
self.clf_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction")
self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction")

# Save the trained model
self.clf_workflow.model_save()
Expand Down Expand Up @@ -233,6 +237,9 @@ def activate(

# Use Scikit-learn style API to process input data
self.clf_workflow.fit(X_train, y_train, is_automl)
y_train_predict = self.clf_workflow.predict(X_train, is_automl)
y_train_predict = self.clf_workflow.np2pd(y_train_predict, y_train.columns)
self.clf_workflow.data_upload(y_train_predict=y_train_predict)
y_test_predict = self.clf_workflow.predict(X_test, is_automl)
y_test_predict = self.clf_workflow.np2pd(y_test_predict, y_test.columns)
self.clf_workflow.data_upload(X=X, y=y, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, y_test_predict=y_test_predict)
Expand All @@ -250,7 +257,8 @@ def activate(
self.clf_workflow.special_components(is_automl)

# Save the prediction result
self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction")
self.clf_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction")
self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction")

# Save the trained model
self.clf_workflow.model_save(is_automl)
12 changes: 10 additions & 2 deletions geochemistrypi/data_mining/process/regress.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ def activate(

# Use Scikit-learn style API to process input data
self.reg_workflow.fit(X_train, y_train)
y_train_predict = self.reg_workflow.predict(X_train)
y_train_predict = self.reg_workflow.np2pd(y_train_predict, y_train.columns)
self.reg_workflow.data_upload(y_train_predict=y_train_predict)
y_test_predict = self.reg_workflow.predict(X_test)
y_test_predict = self.reg_workflow.np2pd(y_test_predict, y_test.columns)
self.reg_workflow.data_upload(y_test_predict=y_test_predict)
Expand All @@ -225,7 +228,8 @@ def activate(
self.reg_workflow.special_components()

# Save the prediction result
self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction")
self.reg_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction")
self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction")

# Save the trained model
self.reg_workflow.model_save()
Expand Down Expand Up @@ -286,6 +290,9 @@ def activate(

# Use Scikit-learn style API to process input data
self.reg_workflow.fit(X_train, y_train, is_automl)
y_train_predict = self.reg_workflow.predict(X_train, is_automl)
y_train_predict = self.reg_workflow.np2pd(y_train_predict, y_train.columns)
self.reg_workflow.data_upload(y_train_predict=y_train_predict)
y_test_predict = self.reg_workflow.predict(X_test, is_automl)
y_test_predict = self.reg_workflow.np2pd(y_test_predict, y_test.columns)
self.reg_workflow.data_upload(y_test_predict=y_test_predict)
Expand All @@ -303,7 +310,8 @@ def activate(
self.reg_workflow.special_components(is_automl)

# Save the prediction result
self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction")
self.reg_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction")
self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction")

# Save the trained model
self.reg_workflow.model_save(is_automl)

0 comments on commit b7c5fd4

Please sign in to comment.