proper check from clean_dataframe (#123)

Deltares · Oct 10, 2024 · 4528df9 · 4528df9
1 parent 6a303fb
commit 4528df9
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 5 deletions.
diff --git a/ddlpy/ddlpy.py b/ddlpy/ddlpy.py
@@ -362,7 +362,7 @@ def _clean_dataframe(measurements):
     measurements = measurements.drop_duplicates()
 
     # remove Tijdstip column, has to be done after drop_duplicates to avoid too much to be dropped
-    measurements = measurements.drop("Tijdstip", axis=1, errors='ignore')
+    measurements = measurements.drop("Tijdstip", axis=1)
 
     # sort dataframe on time, ddl returns non-sorted data
     measurements = measurements.sort_index()

diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py
@@ -215,6 +215,9 @@ def test_measurements_long(location):
 def test_measurements_sorted(measurements):
     """https://github.com/deltares/ddlpy/issues/27"""
 
+    # restore Tijdstip column to avoid error on removal
+    measurements = measurements.copy()
+    measurements["Tijdstip"] = measurements.index
     # sort dataframe on values so it will not be sorted on time
     meas_wrongorder = measurements.sort_values("Meetwaarde.Waarde_Numeriek")
     assert meas_wrongorder.index.is_monotonic_increasing == False
@@ -231,17 +234,19 @@ def test_measurements_duplicated(measurements):
     """
     WALSODN 2010 contains all values three times, ddlpy drops duplicates
     https://github.com/deltares/ddlpy/issues/24
-    if the data is cleaned in ddl, this test will fail and can be removed or adjusted
     
-    length assertion of meas_clean is important, to prevent issue 
-    https://github.com/deltares/ddlpy/issues/53
+    Tijdstip column and length assertion of meas_clean are important
+    to prevent too much duplicates removal https://github.com/deltares/ddlpy/issues/53
     """
+    # restore Tijdstip column to avoid too much duplicates removal
+    measurements = measurements.copy()
+    measurements["Tijdstip"] = measurements.index
 
     # deliberately duplicate values in a measurements dataframe
     meas_duplicated = pd.concat([measurements, measurements, measurements], axis=0)
     meas_clean = ddlpy.ddlpy._clean_dataframe(meas_duplicated)
     assert len(meas_duplicated) == 3024
-    assert len(meas_clean) == 392
+    assert len(meas_clean) == len(measurements) == 1008
 
     # check wheter indexes are DatetimeIndex
     assert isinstance(meas_duplicated.index, pd.DatetimeIndex)