From a1100a8dbc721e6f2c039e72ff8b4ac2985b1675 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Mon, 4 Dec 2023 03:23:35 +0100 Subject: [PATCH] test(test_peaks_over_threshold): ensure get_anomaly function behaves correctly THis function execute get_anomaly_threshold, then use it as the comparison to locate the anomalous data. --- src/anomalytics/__init__.py | 4 ++-- src/anomalytics/stats/__init__.py | 2 ++ tests/test_peaks_over_threshold.py | 34 +++++++++++++++++++++++------- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/anomalytics/__init__.py b/src/anomalytics/__init__.py index 07e9436..16f2a3e 100644 --- a/src/anomalytics/__init__.py +++ b/src/anomalytics/__init__.py @@ -1,7 +1,7 @@ __version__ = "0.1.0" -__all__ = ["get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"] +__all__ = ["get_anomaly", "get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"] -from anomalytics.stats import get_anomaly_score, get_exceedance_peaks_over_threshold +from anomalytics.stats import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold from anomalytics.time_series import read_ts from anomalytics.time_windows import set_time_window diff --git a/src/anomalytics/stats/__init__.py b/src/anomalytics/stats/__init__.py index 5755f01..285f19d 100644 --- a/src/anomalytics/stats/__init__.py +++ b/src/anomalytics/stats/__init__.py @@ -1,4 +1,5 @@ __all__ = [ + "get_anomaly", "get_anomaly_score", "get_anomaly_threshold", "get_threshold_peaks_over_threshold", @@ -6,6 +7,7 @@ ] from anomalytics.stats.peaks_over_threshold import ( + get_anomaly, get_anomaly_score, get_anomaly_threshold, get_exceedance_peaks_over_threshold, diff --git a/tests/test_peaks_over_threshold.py b/tests/test_peaks_over_threshold.py index 37764a1..1bad6f9 100644 --- a/tests/test_peaks_over_threshold.py +++ b/tests/test_peaks_over_threshold.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from anomalytics import get_anomaly_score, get_exceedance_peaks_over_threshold +from anomalytics import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold from anomalytics.stats import get_anomaly_threshold, get_threshold_peaks_over_threshold @@ -13,6 +13,7 @@ def setUp(self): data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], index=pd.date_range(start="2023-01-01", periods=10) ) self.sample_2_ts = pd.Series(np.random.rand(100), index=pd.date_range(start="2023-01-01", periods=100)) + self.sample_3_ts = pd.Series(np.random.rand(100) * 2, index=pd.date_range("2020-01-01", periods=100)) def test_calculate_threshold_for_high_anomaly_type(self): pot_threshold = get_threshold_peaks_over_threshold(ts=self.sample_1_ts, t0=3, anomaly_type="high", q=0.90) @@ -61,14 +62,13 @@ def test_invalid_t0_value_in_exceedance_extraction_function(self): get_exceedance_peaks_over_threshold(ts=self.sample_2_ts, t0=None, anomaly_type="high", q=0.90) # type: ignore def test_fit_exceedance_with_valid_input(self): - ts = pd.Series(np.random.rand(100) * 2, index=pd.date_range("2020-01-01", periods=100)) t0 = 10 gpd_params: dict = {} - exceedances = get_exceedance_peaks_over_threshold(ts=ts, t0=t0, anomaly_type="high", q=0.9) + exceedances = get_exceedance_peaks_over_threshold(ts=self.sample_3_ts, t0=t0, anomaly_type="high", q=0.9) anomaly_scores = get_anomaly_score(ts=exceedances, t0=t0, gpd_params=gpd_params) self.assertIsInstance(anomaly_scores, pd.Series) - self.assertEqual(len(anomaly_scores), len(ts.values) - t0) + self.assertEqual(len(anomaly_scores), len(self.sample_3_ts.values) - t0) self.assertTrue(all(isinstance(gpd_params[i], dict) for i in gpd_params)) def test_fit_exceedance_with_invalid_ts(self): @@ -79,17 +79,15 @@ def test_fit_exceedance_with_invalid_ts(self): get_anomaly_score(ts="not a series", t0=t0, gpd_params=gpd_params) def test_fit_exceedance_with_invalid_t0(self): - ts = pd.Series(np.random.rand(100) * 2, index=pd.date_range("2020-01-01", periods=100)) gpd_params: dict = {} with self.assertRaises(ValueError): - get_anomaly_score(ts=ts, t0=None, gpd_params=gpd_params) # type: ignore + get_anomaly_score(ts=self.sample_3_ts, t0=None, gpd_params=gpd_params) # type: ignore def test_get_anomaly_threshold_with_valid_input(self): - ts = pd.Series(np.random.rand(100), index=pd.date_range("2020-01-01", periods=100)) t1 = 50 q = 0.90 - anomaly_threshold = get_anomaly_threshold(ts=ts, t1=t1, q=q) + anomaly_threshold = get_anomaly_threshold(ts=self.sample_2_ts, t1=t1, q=q) self.assertIsInstance(anomaly_threshold, float) self.assertTrue(0 <= anomaly_threshold <= 1) @@ -114,5 +112,25 @@ def test_confirm_correct_quantile_calculation_for_anomaly_threshold(self): self.assertEqual(anomaly_threshold, expected_anomaly_threshold) + def test_get_anomaly_with_valid_input(self): + self.sample_2_ts.iloc[75:] = self.sample_2_ts.iloc[75:] * 5 + + t1 = 50 + q = 0.90 + anomalies = get_anomaly(ts=self.sample_2_ts, t1=t1, q=q) + + self.assertIsInstance(anomalies, pd.Series) + + expected_anomalies = self.sample_2_ts.iloc[t1:] > get_anomaly_threshold(ts=self.sample_2_ts, t1=t1, q=q) + + self.assertTrue((anomalies == expected_anomalies).all()) + + def test_get_anomaly_with_invalid_ts(self): + t1 = 50 + q = 0.90 + + with self.assertRaises(TypeError): + get_anomaly(ts="not a series", t1=t1, q=q) + def tearDown(self) -> None: return super().tearDown()