From c2322ba49fece35eb4fd0b0f2bdca83f63a9e341 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 8 Apr 2024 16:19:37 +0800 Subject: [PATCH] fix: Improved the functionality for dropping missing values. --- geochemistrypi/data_mining/cli_pipeline.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/geochemistrypi/data_mining/cli_pipeline.py b/geochemistrypi/data_mining/cli_pipeline.py index e23320e5..55be38f5 100644 --- a/geochemistrypi/data_mining/cli_pipeline.py +++ b/geochemistrypi/data_mining/cli_pipeline.py @@ -295,12 +295,6 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = print("Notice: Drop the rows with missing values may lead to a significant loss of data if too many features are chosen.") print("Which strategy do you want to apply?") drop_missing_value_strategy_num = limit_num_input(DROP_MISSING_VALUE_STRATEGY, SECTION[1], num_input) - if drop_missing_value_strategy_num == 1: - print("-*-*- Drop the rows with Missing Values -*-*-") - num2option(DROP_MISSING_VALUE_STRATEGY) - print("Notice: Drop the rows with missing values may lead to a significant loss of data if too many features are chosen.") - print("Which strategy do you want to apply?") - drop_missing_value_strategy_num = limit_num_input(DROP_MISSING_VALUE_STRATEGY, SECTION[1], num_input) if drop_missing_value_strategy_num == 1: # Drop the rows with missing values data_selected_dropped = data_selected.dropna() @@ -313,21 +307,6 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = save_data(data_selected_dropped, "Data Selected Dropped-Imputed", GEOPI_OUTPUT_ARTIFACTS_DATA_PATH, MLFLOW_ARTIFACT_DATA_PATH) drop_rows_with_missing_value_flag = True imputed_flag = False - elif drop_missing_value_strategy_num == 2: - show_data_columns(data_selected.columns) - drop_data_selected = create_sub_data_set(data_selected) - for column_name in drop_data_selected.columns: - # Drop the rows with missing values - data_selected_dropped = data_selected.dropna(subset=[column_name]) - # Reset the index of the data set after dropping the rows with missing values. - data_selected_dropped = data_selected_dropped.reset_index(drop=True) - print("Successfully drop the rows with missing values.") - print("The Selected Data Set After Dropping:") - print(data_selected_dropped) - print("Basic Statistical Information:") - save_data(data_selected_dropped, "Data Selected Dropped-Imputed", GEOPI_OUTPUT_ARTIFACTS_DATA_PATH, MLFLOW_ARTIFACT_DATA_PATH) - drop_rows_with_missing_value_flag = True - imputed_flag = False elif drop_missing_value_strategy_num == 2: show_data_columns(data_selected.columns) drop_data_selected = create_sub_data_set(data_selected)