From 83b07b43218e010eaced6b36880f6563d60a2158 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 16 Mar 2024 16:16:07 +0800 Subject: [PATCH] feat:The predicted value of the training set has been increased --- geochemistrypi/data_mining/model/_base.py | 3 +++ geochemistrypi/data_mining/process/classify.py | 12 ++++++++++-- geochemistrypi/data_mining/process/regress.py | 12 ++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/geochemistrypi/data_mining/model/_base.py b/geochemistrypi/data_mining/model/_base.py index 42cb4817..c57d14c8 100644 --- a/geochemistrypi/data_mining/model/_base.py +++ b/geochemistrypi/data_mining/model/_base.py @@ -202,6 +202,7 @@ def data_upload( X_test: Optional[pd.DataFrame] = None, y_train: Optional[pd.DataFrame] = None, y_test: Optional[pd.DataFrame] = None, + y_train_predict: Optional[pd.DataFrame] = None, y_test_predict: Optional[pd.DataFrame] = None, ) -> None: """This method loads the required data into the base class's attributes.""" @@ -219,6 +220,8 @@ def data_upload( WorkflowBase.y_test = y_test if y_test_predict is not None: WorkflowBase.y_test_predict = y_test_predict + if y_train_predict is not None: + WorkflowBase.y_train_predict = y_train_predict @staticmethod def data_save(df: pd.DataFrame, df_name: str, local_path: str, mlflow_path: str, slogan: str) -> None: diff --git a/geochemistrypi/data_mining/process/classify.py b/geochemistrypi/data_mining/process/classify.py index 8e6b6dfe..42768353 100644 --- a/geochemistrypi/data_mining/process/classify.py +++ b/geochemistrypi/data_mining/process/classify.py @@ -167,6 +167,9 @@ def activate( # Use Scikit-learn style API to process input data self.clf_workflow.fit(X_train, y_train) + y_train_predict = self.clf_workflow.predict(X_train) + y_train_predict = self.clf_workflow.np2pd(y_train_predict, y_train.columns) + self.clf_workflow.data_upload(y_train_predict=y_train_predict) y_test_predict = self.clf_workflow.predict(X_test) y_test_predict = self.clf_workflow.np2pd(y_test_predict, y_test.columns) self.clf_workflow.data_upload(X=X, y=y, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, y_test_predict=y_test_predict) @@ -181,7 +184,8 @@ def activate( self.clf_workflow.special_components() # Save the prediction result - self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction") + self.clf_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction") + self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction") # Save the trained model self.clf_workflow.model_save() @@ -233,6 +237,9 @@ def activate( # Use Scikit-learn style API to process input data self.clf_workflow.fit(X_train, y_train, is_automl) + y_train_predict = self.clf_workflow.predict(X_train, is_automl) + y_train_predict = self.clf_workflow.np2pd(y_train_predict, y_train.columns) + self.clf_workflow.data_upload(y_train_predict=y_train_predict) y_test_predict = self.clf_workflow.predict(X_test, is_automl) y_test_predict = self.clf_workflow.np2pd(y_test_predict, y_test.columns) self.clf_workflow.data_upload(X=X, y=y, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, y_test_predict=y_test_predict) @@ -250,7 +257,8 @@ def activate( self.clf_workflow.special_components(is_automl) # Save the prediction result - self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction") + self.clf_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction") + self.clf_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction") # Save the trained model self.clf_workflow.model_save(is_automl) diff --git a/geochemistrypi/data_mining/process/regress.py b/geochemistrypi/data_mining/process/regress.py index 36d2a7fa..2ee78251 100644 --- a/geochemistrypi/data_mining/process/regress.py +++ b/geochemistrypi/data_mining/process/regress.py @@ -211,6 +211,9 @@ def activate( # Use Scikit-learn style API to process input data self.reg_workflow.fit(X_train, y_train) + y_train_predict = self.reg_workflow.predict(X_train) + y_train_predict = self.reg_workflow.np2pd(y_train_predict, y_train.columns) + self.reg_workflow.data_upload(y_train_predict=y_train_predict) y_test_predict = self.reg_workflow.predict(X_test) y_test_predict = self.reg_workflow.np2pd(y_test_predict, y_test.columns) self.reg_workflow.data_upload(y_test_predict=y_test_predict) @@ -225,7 +228,8 @@ def activate( self.reg_workflow.special_components() # Save the prediction result - self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction") + self.reg_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction") + self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction") # Save the trained model self.reg_workflow.model_save() @@ -286,6 +290,9 @@ def activate( # Use Scikit-learn style API to process input data self.reg_workflow.fit(X_train, y_train, is_automl) + y_train_predict = self.reg_workflow.predict(X_train, is_automl) + y_train_predict = self.reg_workflow.np2pd(y_train_predict, y_train.columns) + self.reg_workflow.data_upload(y_train_predict=y_train_predict) y_test_predict = self.reg_workflow.predict(X_test, is_automl) y_test_predict = self.reg_workflow.np2pd(y_test_predict, y_test.columns) self.reg_workflow.data_upload(y_test_predict=y_test_predict) @@ -303,7 +310,8 @@ def activate( self.reg_workflow.special_components(is_automl) # Save the prediction result - self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Prediction") + self.reg_workflow.data_save(y_train_predict, "Y Train Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Train Prediction") + self.reg_workflow.data_save(y_test_predict, "Y Test Predict", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Model Test Prediction") # Save the trained model self.reg_workflow.model_save(is_automl)