From d3f51c1885a757be0b557743a10ea1f62cb4b526 Mon Sep 17 00:00:00 2001 From: Robert Dunn Date: Fri, 8 Nov 2024 16:29:46 +0000 Subject: [PATCH 1/2] #182: adding information flags to check. Test runs, but not checked in real life --- plot_map_of_flagging_rates.py | 12 ++++++------ qc_tests/timestamp.py | 14 +++++++++++--- qc_utils.py | 1 + tests/test_qc_tests/test_timestamp.py | 5 +++-- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/plot_map_of_flagging_rates.py b/plot_map_of_flagging_rates.py index ed2835b..175860c 100644 --- a/plot_map_of_flagging_rates.py +++ b/plot_map_of_flagging_rates.py @@ -14,16 +14,16 @@ TESTS_FOR_VARS = {"temperature": ["All", "C", "D", "E", "F", "H", "K", "L", "N", "T", - "S", "U", "V", "W", "d", "o", "x", "y",], + "S", "U", "V", "W", "d", "o", "x", "y", "2",], "dew_point_temperature": ['All', 'C', 'D', 'E', 'F', 'H', 'K', 'L', 'N', 'S', - 'T', 'V', 'W', 'd', 'h', 'n', 'o', 'x', 'y'], + 'T', 'V', 'W', 'd', 'h', 'n', 'o', 'x', 'y', "2",], "sea_level_pressure" : ['All', 'D', 'E', 'F', 'H', 'K', 'L', 'N', 'S', - 'T', 'V', 'W', 'd', 'o', 'p', 'x', 'y'], + 'T', 'V', 'W', 'd', 'o', 'p', 'x', 'y', "2",], "station_level_pressure" : ['All', 'D', 'E', 'F', 'H', 'K', 'L', 'N', - 'S', 'T', 'V', 'd', 'o', 'p', 'x', 'y'], + 'S', 'T', 'V', 'd', 'o', 'p', 'x', 'y', "2",], "wind_speed" : ['All', 'E', 'H', 'K', 'L', 'N', 'S', 'T', 'V', - 'W', 'o', 'w', 'x', 'y'], - "wind_direction" : ['All', 'E', 'H', 'K', 'L', 'w', 'x', 'y', "1"]} + 'W', 'o', 'w', 'x', 'y', "2",], + "wind_direction" : ['All', 'E', 'H', 'K', 'L', 'w', 'x', 'y', "1", "2",]} UNITS = {"" : "%", "_counts" : "cts"} diff --git a/qc_tests/timestamp.py b/qc_tests/timestamp.py index 31a24f2..bab9783 100644 --- a/qc_tests/timestamp.py +++ b/qc_tests/timestamp.py @@ -69,11 +69,19 @@ def identify_multiple_values(obs_var: utils.Meteorological_Variable, times: np.n print(f" Number of identical timestamps in {obs_var.name}: {multiple_obs_at_time.shape[0]}") if len(multiple_obs_at_time) != 0: + # to the observations differ for the entries suspect_locs, = np.ma.where(value_diffs[multiple_obs_at_time] != 0) - # set the first of the obs, then the second which make the diff - compressed_flags[multiple_obs_at_time[suspect_locs]] = "T" - compressed_flags[multiple_obs_at_time[suspect_locs]+1] = "T" + if len(suspect_locs) > 0: + # Observations have different values, so not clear which is correct. + # Flag both + # set the first of the obs, then the second which make the diff + compressed_flags[multiple_obs_at_time[suspect_locs]] = "T" + compressed_flags[multiple_obs_at_time[suspect_locs]+1] = "T" + else: + # Observations have the _same_ value, so add information flag onl + compressed_flags[multiple_obs_at_time] = "2" + compressed_flags[multiple_obs_at_time+1] = "2" # Uncompress the flags & insert flags = np.array(["" for i in range(obs_var.data.shape[0])]) diff --git a/qc_utils.py b/qc_utils.py index 07627bc..0eab133 100644 --- a/qc_utils.py +++ b/qc_utils.py @@ -49,6 +49,7 @@ "x" : "Excess streak proportion", "y" : "Repeated Day streaks", "1" : "Wind logical - calm, masked direction", + "2" : "Timestamp - identical observation qvalues", } diff --git a/tests/test_qc_tests/test_timestamp.py b/tests/test_qc_tests/test_timestamp.py index dc51f7d..aeea3eb 100644 --- a/tests/test_qc_tests/test_timestamp.py +++ b/tests/test_qc_tests/test_timestamp.py @@ -88,10 +88,11 @@ def test_identify_multiple_values_same(): station.times[3] = station.times[2] # Even though two time stamps are the same, the values are identical - # (np.ones) so no flag set + # (np.ones) but an information flag still set expected = np.array(["" for _ in range(10)]) + expected[2:4] = "2" - timestamp.identify_multiple_values(obs_var, station. times, {}) + timestamp.identify_multiple_values(obs_var, station.times, {}) np.testing.assert_array_equal(obs_var.flags, expected) From 38206bec4f4172d0d6bee3774dd4b8221d1111b4 Mon Sep 17 00:00:00 2001 From: Robert Dunn Date: Fri, 8 Nov 2024 17:31:49 +0000 Subject: [PATCH 2/2] #182: updated flag file, improved verbose output --- io_utils.py | 3 ++- tests/example_data/Example_flag_file.flg | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/io_utils.py b/io_utils.py index 54fe990..248dddd 100644 --- a/io_utils.py +++ b/io_utils.py @@ -196,6 +196,7 @@ def flag_write(outfilename: str, df: pd.DataFrame, diagnostics: bool = False) -> this_var_data = df[var].fillna(MDI).to_numpy().astype(float) this_var_data = np.ma.masked_where(this_var_data == MDI, this_var_data) + # write out for all tests, regardless if set for this variable or not for test in QC_TESTS.keys(): locs = flags[flags.str.contains(test)] @@ -219,7 +220,7 @@ def flag_write(outfilename: str, df: pd.DataFrame, diagnostics: bool = False) -> logging.info(f"{var} - {flagged.shape[0]}") if diagnostics: print(f"{var} - {flagged.shape[0]}") - print(f"{var} - {flagged.shape[0]/np.ma.count(this_var_data)}") + print(f"{var} - {100*flagged.shape[0]/np.ma.count(this_var_data):.1f}%") return # flag_write diff --git a/tests/example_data/Example_flag_file.flg b/tests/example_data/Example_flag_file.flg index 54c5475..3e1cfc0 100644 --- a/tests/example_data/Example_flag_file.flg +++ b/tests/example_data/Example_flag_file.flg @@ -40,5 +40,9 @@ temperature : x : 0.0 temperature : x_counts : 0 temperature : y : 0.0 temperature : y_counts : 0 +temperature : 1 : 0.0 +temperature : 1_counts : 0 +temperature : 2 : 0.0 +temperature : 2_counts : 0 temperature : All : 0.8 temperature : All_counts : 4