Skip to content

Commit

Permalink
Merge pull request #277 from ZJUEarthData/dev/Jin
Browse files Browse the repository at this point in the history
perf: Added the recognition of Chinese parentheses and Spaces
  • Loading branch information
SanyHe authored Nov 7, 2023
2 parents 56159c5 + c669eb4 commit aa8a5f4
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 49 deletions.
112 changes: 65 additions & 47 deletions geochemistrypi/data_mining/data/data_readiness.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,51 +144,69 @@ def create_sub_data_set(data: pd.DataFrame) -> pd.DataFrame:
pd.DataFrame
The sub data set.
"""
sub_data_set_columns_range = input(
"Select the data range you want to process.\n"
"Input format:\n"
'Format 1: "[**, **]; **; [**, **]", such as "[1, 3]; 7; [10, 13]" '
"--> you want to deal with the columns 1, 2, 3, 7, 10, 11, 12, 13 \n"
'Format 2: "xx", such as "7" --> you want to deal with the columns 7 \n'
"@input: "
sub_data_set_columns_range = str(
input(
"Select the data range you want to process.\n"
"Input format:\n"
'Format 1: "[**, **]; **; [**, **]", such as "[1, 3]; 7; [10, 13]" '
"--> you want to deal with the columns 1, 2, 3, 7, 10, 11, 12, 13 \n"
'Format 2: "xx", such as "7" --> you want to deal with the columns 7 \n'
"@input: "
)
)
while True:
temp = sub_data_set_columns_range.split(";")
if len(sub_data_set_columns_range) != 0:
for i in range(len(temp)):
if isinstance(eval(temp[i]), int):
if int(temp[i]) > int(data.shape[1]):
print("The input {} is incorrect!".format(temp[i]))
print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1]))
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True
break
else:
judge = False
else:
min_max = eval(temp[i])
if int(min_max[0]) >= int(min_max[1]):
print("There is a problem with the format of the data you entered!")
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True
break
elif int(min_max[1]) > int(data.shape[1]):
print("The input {} is incorrect!".format(temp[i]))
print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1]))
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True
break
else:
judge = False
else:
print("You have not entered the sequence number of the selected data!")
print("The number you entered should be in the range of options: 1 - {}".format(data.shape[1]))
if ("【" in sub_data_set_columns_range) or ("】" in sub_data_set_columns_range):
print("There is a problem with the format of the parentheses entered !")
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
sub_data_set_columns_range = str(input("-----* Please enter again *-----\n@input: "))
judge = True
else:
monitor_number = 0
for i in ["[", "]"]:
if i in sub_data_set_columns_range:
monitor_number = monitor_number + 1
if monitor_number % 2 != 0:
print("There is a problem with the format of the parentheses entered !")
time.sleep(0.5)
sub_data_set_columns_range = str(input("-----* Please enter again *-----\n@input: "))
judge = True
sub_data_set_columns_range = sub_data_set_columns_range.replace(" ", "")
temp = sub_data_set_columns_range.split(";")
if len(sub_data_set_columns_range) != 0:
for i in range(len(temp)):
if isinstance(eval(temp[i]), int):
if int(temp[i]) > int(data.shape[1]):
print("The input {} is incorrect!".format(temp[i]))
print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1]))
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True
break
else:
judge = False
else:
min_max = eval(temp[i])
if int(min_max[0]) >= int(min_max[1]):
print("There is a problem with the format of the data you entered!")
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True
break
elif int(min_max[1]) > int(data.shape[1]):
print("The input {} is incorrect!".format(temp[i]))
print("The number you entered is out of the range of options: 1 - {}".format(data.shape[1]))
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True
break
else:
judge = False
else:
print("You have not entered the sequence number of the selected data!")
print("The number you entered should be in the range of options: 1 - {}".format(data.shape[1]))
time.sleep(0.5)
sub_data_set_columns_range = input("-----* Please enter again *-----\n@input: ")
judge = True

if judge is False:
break
Expand All @@ -201,23 +219,23 @@ def create_sub_data_set(data: pd.DataFrame) -> pd.DataFrame:
except SyntaxError:
print("Warning: Please use English input method editor.")
judge = True
sub_data_set_columns_range = input("@input: ")
sub_data_set_columns_range = str(input("@input: "))
except NameError:
print("Warning: Please follow the rules and re-enter.")
judge = True
sub_data_set_columns_range = input("@input: ")
sub_data_set_columns_range = str(input("@input: "))
except UnicodeDecodeError:
print("Warning: Please use English input method editor.")
judge = True
sub_data_set_columns_range = input("@input: ")
sub_data_set_columns_range = str(input("@input: "))
except IndexError:
print("Warning: Please follow the rules and re-enter.")
judge = True
sub_data_set_columns_range = input("@input: ")
sub_data_set_columns_range = str(input("@input: "))
except TypeError:
print("Warning: Please follow the rules and re-enter.")
judge = True
sub_data_set_columns_range = input("@input: ")
sub_data_set_columns_range = str(input("@input: "))
else:
data_checking = data.iloc[:, sub_data_set_columns_selected]
for i in data_checking.columns.values:
Expand All @@ -233,7 +251,7 @@ def create_sub_data_set(data: pd.DataFrame) -> pd.DataFrame:
print(f"Warning: The data type of selected column {df_test.columns.values} is not numeric!" " Please make sure that the selected data type is numeric and re-enter.")
judge = True
if judge is True:
sub_data_set_columns_range = input("@input: ")
sub_data_set_columns_range = str(input("@input: "))
if judge is False:
break

Expand Down
4 changes: 2 additions & 2 deletions geochemistrypi/data_mining/plot/map_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ def process_world_map(data: pd.DataFrame) -> None:
map_flag = 0
is_map_projection = 0
detection_index = 0
lon = ["LONGITUDE", "Longitude (°E)", "longitude", "Longitude", "经度 (°N)", "经度"]
lat = ["LATITUDE", "Latitude (°N)", "latitude", "Latitude", "纬度 (°E)", "纬度"]
lon = ["LONGITUDE", "Longitude (°E)", "longitude", "Longitude", "经度 (°N)", "经度", "lng"]
lat = ["LATITUDE", "Latitude (°N)", "latitude", "Latitude", "纬度 (°E)", "纬度", "lat"]
j = [j for j in lat if j in data.columns]
i = [i for i in lon if i in data.columns]
if bool(len(j) > 0):
Expand Down

0 comments on commit aa8a5f4

Please sign in to comment.