Skip to content

Commit

Permalink
proper check from clean_dataframe (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
veenstrajelmer authored Oct 10, 2024
1 parent 6a303fb commit 4528df9
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
2 changes: 1 addition & 1 deletion ddlpy/ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def _clean_dataframe(measurements):
measurements = measurements.drop_duplicates()

# remove Tijdstip column, has to be done after drop_duplicates to avoid too much to be dropped
measurements = measurements.drop("Tijdstip", axis=1, errors='ignore')
measurements = measurements.drop("Tijdstip", axis=1)

# sort dataframe on time, ddl returns non-sorted data
measurements = measurements.sort_index()
Expand Down
13 changes: 9 additions & 4 deletions tests/test_ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ def test_measurements_long(location):
def test_measurements_sorted(measurements):
"""https://github.com/deltares/ddlpy/issues/27"""

# restore Tijdstip column to avoid error on removal
measurements = measurements.copy()
measurements["Tijdstip"] = measurements.index
# sort dataframe on values so it will not be sorted on time
meas_wrongorder = measurements.sort_values("Meetwaarde.Waarde_Numeriek")
assert meas_wrongorder.index.is_monotonic_increasing == False
Expand All @@ -231,17 +234,19 @@ def test_measurements_duplicated(measurements):
"""
WALSODN 2010 contains all values three times, ddlpy drops duplicates
https://github.com/deltares/ddlpy/issues/24
if the data is cleaned in ddl, this test will fail and can be removed or adjusted
length assertion of meas_clean is important, to prevent issue
https://github.com/deltares/ddlpy/issues/53
Tijdstip column and length assertion of meas_clean are important
to prevent too much duplicates removal https://github.com/deltares/ddlpy/issues/53
"""
# restore Tijdstip column to avoid too much duplicates removal
measurements = measurements.copy()
measurements["Tijdstip"] = measurements.index

# deliberately duplicate values in a measurements dataframe
meas_duplicated = pd.concat([measurements, measurements, measurements], axis=0)
meas_clean = ddlpy.ddlpy._clean_dataframe(meas_duplicated)
assert len(meas_duplicated) == 3024
assert len(meas_clean) == 392
assert len(meas_clean) == len(measurements) == 1008

# check wheter indexes are DatetimeIndex
assert isinstance(meas_duplicated.index, pd.DatetimeIndex)
Expand Down

0 comments on commit 4528df9

Please sign in to comment.