From d3f51c1885a757be0b557743a10ea1f62cb4b526 Mon Sep 17 00:00:00 2001
From: Robert Dunn <robert.dunn@metoffice.gov.uk>
Date: Fri, 8 Nov 2024 16:29:46 +0000
Subject: [PATCH 1/2] #182: adding information flags to check.  Test runs, but
 not checked in real life

---
 plot_map_of_flagging_rates.py         | 12 ++++++------
 qc_tests/timestamp.py                 | 14 +++++++++++---
 qc_utils.py                           |  1 +
 tests/test_qc_tests/test_timestamp.py |  5 +++--
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/plot_map_of_flagging_rates.py b/plot_map_of_flagging_rates.py
index ed2835b..175860c 100644
--- a/plot_map_of_flagging_rates.py
+++ b/plot_map_of_flagging_rates.py
@@ -14,16 +14,16 @@
 
 
 TESTS_FOR_VARS = {"temperature": ["All", "C", "D", "E", "F", "H", "K", "L", "N", "T",
-                                  "S", "U", "V", "W", "d", "o", "x", "y",],
+                                  "S", "U", "V", "W", "d", "o", "x", "y", "2",],
                   "dew_point_temperature": ['All', 'C', 'D', 'E', 'F', 'H', 'K', 'L', 'N', 'S',
-                                            'T', 'V', 'W', 'd', 'h', 'n', 'o', 'x', 'y'],
+                                            'T', 'V', 'W', 'd', 'h', 'n', 'o', 'x', 'y', "2",],
                   "sea_level_pressure" : ['All', 'D', 'E', 'F', 'H', 'K', 'L', 'N', 'S',
-                                          'T', 'V', 'W', 'd', 'o', 'p', 'x', 'y'],
+                                          'T', 'V', 'W', 'd', 'o', 'p', 'x', 'y', "2",],
                   "station_level_pressure" : ['All', 'D', 'E', 'F', 'H', 'K', 'L', 'N',
-                                              'S', 'T', 'V', 'd', 'o', 'p', 'x', 'y'],
+                                              'S', 'T', 'V', 'd', 'o', 'p', 'x', 'y', "2",],
                   "wind_speed" : ['All', 'E', 'H', 'K', 'L', 'N', 'S', 'T', 'V',
-                                  'W', 'o', 'w', 'x', 'y'],
-                  "wind_direction" : ['All', 'E', 'H', 'K', 'L', 'w', 'x', 'y', "1"]}
+                                  'W', 'o', 'w', 'x', 'y', "2",],
+                  "wind_direction" : ['All', 'E', 'H', 'K', 'L', 'w', 'x', 'y', "1", "2",]}
 
 UNITS = {"" : "%", "_counts" : "cts"}
 
diff --git a/qc_tests/timestamp.py b/qc_tests/timestamp.py
index 31a24f2..bab9783 100644
--- a/qc_tests/timestamp.py
+++ b/qc_tests/timestamp.py
@@ -69,11 +69,19 @@ def identify_multiple_values(obs_var: utils.Meteorological_Variable, times: np.n
         print(f" Number of identical timestamps in {obs_var.name}: {multiple_obs_at_time.shape[0]}")
 
     if len(multiple_obs_at_time) != 0:
+        # to the observations differ for the entries
         suspect_locs, = np.ma.where(value_diffs[multiple_obs_at_time] != 0)
 
-        # set the first of the obs, then the second which make the diff
-        compressed_flags[multiple_obs_at_time[suspect_locs]] = "T"
-        compressed_flags[multiple_obs_at_time[suspect_locs]+1] = "T"
+        if len(suspect_locs) > 0:
+            # Observations have different values, so not clear which is correct.
+            #   Flag both
+            # set the first of the obs, then the second which make the diff
+            compressed_flags[multiple_obs_at_time[suspect_locs]] = "T"
+            compressed_flags[multiple_obs_at_time[suspect_locs]+1] = "T"
+        else:
+            # Observations have the _same_ value, so add information flag onl
+            compressed_flags[multiple_obs_at_time] = "2"
+            compressed_flags[multiple_obs_at_time+1] = "2"
 
         # Uncompress the flags & insert
         flags = np.array(["" for i in range(obs_var.data.shape[0])])
diff --git a/qc_utils.py b/qc_utils.py
index 07627bc..0eab133 100644
--- a/qc_utils.py
+++ b/qc_utils.py
@@ -49,6 +49,7 @@
             "x" : "Excess streak proportion",
             "y" : "Repeated Day streaks",
             "1" : "Wind logical - calm, masked direction",
+            "2" : "Timestamp - identical observation qvalues",
             }
 
 
diff --git a/tests/test_qc_tests/test_timestamp.py b/tests/test_qc_tests/test_timestamp.py
index dc51f7d..aeea3eb 100644
--- a/tests/test_qc_tests/test_timestamp.py
+++ b/tests/test_qc_tests/test_timestamp.py
@@ -88,10 +88,11 @@ def test_identify_multiple_values_same():
     station.times[3] = station.times[2]
 
     # Even though two time stamps are the same, the values are identical
-    #   (np.ones) so no flag set
+    #   (np.ones) but an information flag still set
     expected = np.array(["" for _ in range(10)])
+    expected[2:4] = "2"
 
-    timestamp.identify_multiple_values(obs_var, station. times, {})
+    timestamp.identify_multiple_values(obs_var, station.times, {})
 
     np.testing.assert_array_equal(obs_var.flags, expected)
 

From 38206bec4f4172d0d6bee3774dd4b8221d1111b4 Mon Sep 17 00:00:00 2001
From: Robert Dunn <robert.dunn@metoffice.gov.uk>
Date: Fri, 8 Nov 2024 17:31:49 +0000
Subject: [PATCH 2/2] #182: updated flag file, improved verbose output

---
 io_utils.py                              | 3 ++-
 tests/example_data/Example_flag_file.flg | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/io_utils.py b/io_utils.py
index 54fe990..248dddd 100644
--- a/io_utils.py
+++ b/io_utils.py
@@ -196,6 +196,7 @@ def flag_write(outfilename: str, df: pd.DataFrame, diagnostics: bool = False) ->
             this_var_data = df[var].fillna(MDI).to_numpy().astype(float)
             this_var_data = np.ma.masked_where(this_var_data == MDI, this_var_data)
 
+            # write out for all tests, regardless if set for this variable or not
             for test in QC_TESTS.keys():
                 locs = flags[flags.str.contains(test)]
 
@@ -219,7 +220,7 @@ def flag_write(outfilename: str, df: pd.DataFrame, diagnostics: bool = False) ->
             logging.info(f"{var} - {flagged.shape[0]}")
             if diagnostics:
                 print(f"{var} - {flagged.shape[0]}")
-                print(f"{var} - {flagged.shape[0]/np.ma.count(this_var_data)}")
+                print(f"{var} - {100*flagged.shape[0]/np.ma.count(this_var_data):.1f}%")
 
 
     return # flag_write
diff --git a/tests/example_data/Example_flag_file.flg b/tests/example_data/Example_flag_file.flg
index 54c5475..3e1cfc0 100644
--- a/tests/example_data/Example_flag_file.flg
+++ b/tests/example_data/Example_flag_file.flg
@@ -40,5 +40,9 @@ temperature : x : 0.0
 temperature : x_counts : 0
 temperature : y : 0.0
 temperature : y_counts : 0
+temperature : 1 : 0.0
+temperature : 1_counts : 0
+temperature : 2 : 0.0
+temperature : 2_counts : 0
 temperature : All : 0.8
 temperature : All_counts : 4