From c669eb42e3df771e1b8b42d6882879c52216df99 Mon Sep 17 00:00:00 2001 From: Sunjianhao <97781484+Jin-sjh@users.noreply.github.com> Date: Sun, 5 Nov 2023 21:42:19 +0800 Subject: [PATCH] perf: Added the recognition of Chinese parentheses and Spaces --- .../data_mining/data/data_readiness.py | 112 ++++++++++-------- geochemistrypi/data_mining/plot/map_plot.py | 4 +- 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/geochemistrypi/data_mining/data/data_readiness.py b/geochemistrypi/data_mining/data/data_readiness.py index ef23ea3b..133c1289 100644 --- a/geochemistrypi/data_mining/data/data_readiness.py +++ b/geochemistrypi/data_mining/data/data_readiness.py @@ -144,51 +144,69 @@ def create_sub_data_set(data: pd.DataFrame) -> pd.DataFrame: pd.DataFrame The sub data set. """ - sub_data_set_columns_range = input( - "Select the data range you want to process.\n" - "Input format:\n" - 'Format 1: "[**, **]; **; [**, **]", such as "[1, 3]; 7; [10, 13]" ' - "--> you want to deal with the columns 1, 2, 3, 7, 10, 11, 12, 13 \n" - 'Format 2: "xx", such as "7" --> you want to deal with the columns 7 \n' - "@input: " + sub_data_set_columns_range = str( + input( + "Select the data range you want to process.\n" + "Input format:\n" + 'Format 1: "[**, **]; **; [**, **]", such as "[1, 3]; 7; [10, 13]" ' + "--> you want to deal with the columns 1, 2, 3, 7, 10, 11, 12, 13 \n" + 'Format 2: "xx", such as "7" --> you want to deal with the columns 7 \n' + "@input: " + ) ) while True: - temp = sub_data_set_columns_range.split(";") - if len(sub_data_set_columns_range) != 0: - for i in range(len(temp)): - if isinstance(eval(temp[i]), int): - if int(temp[i]) > int(data.shape[1]): - print("The input {} is incorrect!".format(temp[i])) - print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1])) - time.sleep(0.5) - sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") - judge = True - break - else: - judge = False - else: - min_max = eval(temp[i]) - if int(min_max[0]) >= int(min_max[1]): - print("There is a problem with the format of the data you entered!") - time.sleep(0.5) - sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") - judge = True - break - elif int(min_max[1]) > int(data.shape[1]): - print("The input {} is incorrect!".format(temp[i])) - print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1])) - time.sleep(0.5) - sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") - judge = True - break - else: - judge = False - else: - print("You have not entered the sequence number of the selected data!") - print("The number you entered should be in the range of options: 1 - {}".format(data.shape[1])) + if ("【" in sub_data_set_columns_range) or ("】" in sub_data_set_columns_range): + print("There is a problem with the format of the parentheses entered !") time.sleep(0.5) - sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") + sub_data_set_columns_range = str(input("-----* Please enter again *-----\n@input: ")) judge = True + else: + monitor_number = 0 + for i in ["[", "]"]: + if i in sub_data_set_columns_range: + monitor_number = monitor_number + 1 + if monitor_number % 2 != 0: + print("There is a problem with the format of the parentheses entered !") + time.sleep(0.5) + sub_data_set_columns_range = str(input("-----* Please enter again *-----\n@input: ")) + judge = True + sub_data_set_columns_range = sub_data_set_columns_range.replace(" ", "") + temp = sub_data_set_columns_range.split(";") + if len(sub_data_set_columns_range) != 0: + for i in range(len(temp)): + if isinstance(eval(temp[i]), int): + if int(temp[i]) > int(data.shape[1]): + print("The input {} is incorrect!".format(temp[i])) + print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1])) + time.sleep(0.5) + sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") + judge = True + break + else: + judge = False + else: + min_max = eval(temp[i]) + if int(min_max[0]) >= int(min_max[1]): + print("There is a problem with the format of the data you entered!") + time.sleep(0.5) + sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") + judge = True + break + elif int(min_max[1]) > int(data.shape[1]): + print("The input {} is incorrect!".format(temp[i])) + print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1])) + time.sleep(0.5) + sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") + judge = True + break + else: + judge = False + else: + print("You have not entered the sequence number of the selected data!") + print("The number you entered should be in the range of options: 1 - {}".format(data.shape[1])) + time.sleep(0.5) + sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ") + judge = True if judge is False: break @@ -201,23 +219,23 @@ def create_sub_data_set(data: pd.DataFrame) -> pd.DataFrame: except SyntaxError: print("Warning: Please use English input method editor.") judge = True - sub_data_set_columns_range = input("@input: ") + sub_data_set_columns_range = str(input("@input: ")) except NameError: print("Warning: Please follow the rules and re-enter.") judge = True - sub_data_set_columns_range = input("@input: ") + sub_data_set_columns_range = str(input("@input: ")) except UnicodeDecodeError: print("Warning: Please use English input method editor.") judge = True - sub_data_set_columns_range = input("@input: ") + sub_data_set_columns_range = str(input("@input: ")) except IndexError: print("Warning: Please follow the rules and re-enter.") judge = True - sub_data_set_columns_range = input("@input: ") + sub_data_set_columns_range = str(input("@input: ")) except TypeError: print("Warning: Please follow the rules and re-enter.") judge = True - sub_data_set_columns_range = input("@input: ") + sub_data_set_columns_range = str(input("@input: ")) else: data_checking = data.iloc[:, sub_data_set_columns_selected] for i in data_checking.columns.values: @@ -233,7 +251,7 @@ def create_sub_data_set(data: pd.DataFrame) -> pd.DataFrame: print(f"Warning: The data type of selected column {df_test.columns.values} is not numeric!" " Please make sure that the selected data type is numeric and re-enter.") judge = True if judge is True: - sub_data_set_columns_range = input("@input: ") + sub_data_set_columns_range = str(input("@input: ")) if judge is False: break diff --git a/geochemistrypi/data_mining/plot/map_plot.py b/geochemistrypi/data_mining/plot/map_plot.py index 88c232a0..a7275c98 100644 --- a/geochemistrypi/data_mining/plot/map_plot.py +++ b/geochemistrypi/data_mining/plot/map_plot.py @@ -133,8 +133,8 @@ def process_world_map(data: pd.DataFrame) -> None: map_flag = 0 is_map_projection = 0 detection_index = 0 - lon = ["LONGITUDE", "Longitude (°E)", "longitude", "Longitude", "经度 (°N)", "经度"] - lat = ["LATITUDE", "Latitude (°N)", "latitude", "Latitude", "纬度 (°E)", "纬度"] + lon = ["LONGITUDE", "Longitude (°E)", "longitude", "Longitude", "经度 (°N)", "经度", "lng"] + lat = ["LATITUDE", "Latitude (°N)", "latitude", "Latitude", "纬度 (°E)", "纬度", "lat"] j = [j for j in lat if j in data.columns] i = [i for i in lon if i in data.columns] if bool(len(j) > 0):