Skip to content

Commit

Permalink
fix: Improved the functionality for dropping missing values.
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown authored and unknown committed Apr 8, 2024
1 parent feb25f7 commit c2322ba
Showing 1 changed file with 0 additions and 21 deletions.
21 changes: 0 additions & 21 deletions geochemistrypi/data_mining/cli_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,6 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] =
print("Notice: Drop the rows with missing values may lead to a significant loss of data if too many features are chosen.")
print("Which strategy do you want to apply?")
drop_missing_value_strategy_num = limit_num_input(DROP_MISSING_VALUE_STRATEGY, SECTION[1], num_input)
if drop_missing_value_strategy_num == 1:
print("-*-*- Drop the rows with Missing Values -*-*-")
num2option(DROP_MISSING_VALUE_STRATEGY)
print("Notice: Drop the rows with missing values may lead to a significant loss of data if too many features are chosen.")
print("Which strategy do you want to apply?")
drop_missing_value_strategy_num = limit_num_input(DROP_MISSING_VALUE_STRATEGY, SECTION[1], num_input)
if drop_missing_value_strategy_num == 1:
# Drop the rows with missing values
data_selected_dropped = data_selected.dropna()
Expand All @@ -313,21 +307,6 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] =
save_data(data_selected_dropped, "Data Selected Dropped-Imputed", GEOPI_OUTPUT_ARTIFACTS_DATA_PATH, MLFLOW_ARTIFACT_DATA_PATH)
drop_rows_with_missing_value_flag = True
imputed_flag = False
elif drop_missing_value_strategy_num == 2:
show_data_columns(data_selected.columns)
drop_data_selected = create_sub_data_set(data_selected)
for column_name in drop_data_selected.columns:
# Drop the rows with missing values
data_selected_dropped = data_selected.dropna(subset=[column_name])
# Reset the index of the data set after dropping the rows with missing values.
data_selected_dropped = data_selected_dropped.reset_index(drop=True)
print("Successfully drop the rows with missing values.")
print("The Selected Data Set After Dropping:")
print(data_selected_dropped)
print("Basic Statistical Information:")
save_data(data_selected_dropped, "Data Selected Dropped-Imputed", GEOPI_OUTPUT_ARTIFACTS_DATA_PATH, MLFLOW_ARTIFACT_DATA_PATH)
drop_rows_with_missing_value_flag = True
imputed_flag = False
elif drop_missing_value_strategy_num == 2:
show_data_columns(data_selected.columns)
drop_data_selected = create_sub_data_set(data_selected)
Expand Down

0 comments on commit c2322ba

Please sign in to comment.