Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update timestamp to identify all duplicates with information flag, rather than just non-equal ones #184

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def flag_write(outfilename: str, df: pd.DataFrame, diagnostics: bool = False) ->
this_var_data = df[var].fillna(MDI).to_numpy().astype(float)
this_var_data = np.ma.masked_where(this_var_data == MDI, this_var_data)

# write out for all tests, regardless if set for this variable or not
for test in QC_TESTS.keys():
locs = flags[flags.str.contains(test)]

Expand All @@ -219,7 +220,7 @@ def flag_write(outfilename: str, df: pd.DataFrame, diagnostics: bool = False) ->
logging.info(f"{var} - {flagged.shape[0]}")
if diagnostics:
print(f"{var} - {flagged.shape[0]}")
print(f"{var} - {flagged.shape[0]/np.ma.count(this_var_data)}")
print(f"{var} - {100*flagged.shape[0]/np.ma.count(this_var_data):.1f}%")


return # flag_write
Expand Down
12 changes: 6 additions & 6 deletions plot_map_of_flagging_rates.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@


TESTS_FOR_VARS = {"temperature": ["All", "C", "D", "E", "F", "H", "K", "L", "N", "T",
"S", "U", "V", "W", "d", "o", "x", "y",],
"S", "U", "V", "W", "d", "o", "x", "y", "2",],
"dew_point_temperature": ['All', 'C', 'D', 'E', 'F', 'H', 'K', 'L', 'N', 'S',
'T', 'V', 'W', 'd', 'h', 'n', 'o', 'x', 'y'],
'T', 'V', 'W', 'd', 'h', 'n', 'o', 'x', 'y', "2",],
"sea_level_pressure" : ['All', 'D', 'E', 'F', 'H', 'K', 'L', 'N', 'S',
'T', 'V', 'W', 'd', 'o', 'p', 'x', 'y'],
'T', 'V', 'W', 'd', 'o', 'p', 'x', 'y', "2",],
"station_level_pressure" : ['All', 'D', 'E', 'F', 'H', 'K', 'L', 'N',
'S', 'T', 'V', 'd', 'o', 'p', 'x', 'y'],
'S', 'T', 'V', 'd', 'o', 'p', 'x', 'y', "2",],
"wind_speed" : ['All', 'E', 'H', 'K', 'L', 'N', 'S', 'T', 'V',
'W', 'o', 'w', 'x', 'y'],
"wind_direction" : ['All', 'E', 'H', 'K', 'L', 'w', 'x', 'y', "1"]}
'W', 'o', 'w', 'x', 'y', "2",],
"wind_direction" : ['All', 'E', 'H', 'K', 'L', 'w', 'x', 'y', "1", "2",]}

UNITS = {"" : "%", "_counts" : "cts"}

Expand Down
14 changes: 11 additions & 3 deletions qc_tests/timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,19 @@ def identify_multiple_values(obs_var: utils.Meteorological_Variable, times: np.n
print(f" Number of identical timestamps in {obs_var.name}: {multiple_obs_at_time.shape[0]}")

if len(multiple_obs_at_time) != 0:
# to the observations differ for the entries
suspect_locs, = np.ma.where(value_diffs[multiple_obs_at_time] != 0)

# set the first of the obs, then the second which make the diff
compressed_flags[multiple_obs_at_time[suspect_locs]] = "T"
compressed_flags[multiple_obs_at_time[suspect_locs]+1] = "T"
if len(suspect_locs) > 0:
# Observations have different values, so not clear which is correct.
# Flag both
# set the first of the obs, then the second which make the diff
compressed_flags[multiple_obs_at_time[suspect_locs]] = "T"
compressed_flags[multiple_obs_at_time[suspect_locs]+1] = "T"
else:
# Observations have the _same_ value, so add information flag onl
compressed_flags[multiple_obs_at_time] = "2"
compressed_flags[multiple_obs_at_time+1] = "2"

# Uncompress the flags & insert
flags = np.array(["" for i in range(obs_var.data.shape[0])])
Expand Down
1 change: 1 addition & 0 deletions qc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"x" : "Excess streak proportion",
"y" : "Repeated Day streaks",
"1" : "Wind logical - calm, masked direction",
"2" : "Timestamp - identical observation qvalues",
}


Expand Down
4 changes: 4 additions & 0 deletions tests/example_data/Example_flag_file.flg
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,9 @@ temperature : x : 0.0
temperature : x_counts : 0
temperature : y : 0.0
temperature : y_counts : 0
temperature : 1 : 0.0
temperature : 1_counts : 0
temperature : 2 : 0.0
temperature : 2_counts : 0
temperature : All : 0.8
temperature : All_counts : 4
5 changes: 3 additions & 2 deletions tests/test_qc_tests/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ def test_identify_multiple_values_same():
station.times[3] = station.times[2]

# Even though two time stamps are the same, the values are identical
# (np.ones) so no flag set
# (np.ones) but an information flag still set
expected = np.array(["" for _ in range(10)])
expected[2:4] = "2"

timestamp.identify_multiple_values(obs_var, station. times, {})
timestamp.identify_multiple_values(obs_var, station.times, {})

np.testing.assert_array_equal(obs_var.flags, expected)

Expand Down