test(test_peaks_over_threshold): ensure get_anomaly function behaves …

…correctly THis function execute get_anomaly_threshold, then use it as the comparison to locate the anomalous data.
Aeternalis-Ingenium · Dec 4, 2023 · a1100a8 · a1100a8
1 parent 6279757
commit a1100a8
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 10 deletions.
diff --git a/src/anomalytics/__init__.py b/src/anomalytics/__init__.py
@@ -1,7 +1,7 @@
 __version__ = "0.1.0"
 
-__all__ = ["get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"]
+__all__ = ["get_anomaly", "get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"]
 
-from anomalytics.stats import get_anomaly_score, get_exceedance_peaks_over_threshold
+from anomalytics.stats import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold
 from anomalytics.time_series import read_ts
 from anomalytics.time_windows import set_time_window
diff --git a/src/anomalytics/stats/__init__.py b/src/anomalytics/stats/__init__.py
@@ -1,11 +1,13 @@
 __all__ = [
+    "get_anomaly",
     "get_anomaly_score",
     "get_anomaly_threshold",
     "get_threshold_peaks_over_threshold",
     "get_exceedance_peaks_over_threshold",
 ]
 
 from anomalytics.stats.peaks_over_threshold import (
+    get_anomaly,
     get_anomaly_score,
     get_anomaly_threshold,
     get_exceedance_peaks_over_threshold,

diff --git a/tests/test_peaks_over_threshold.py b/tests/test_peaks_over_threshold.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from anomalytics import get_anomaly_score, get_exceedance_peaks_over_threshold
+from anomalytics import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold
 from anomalytics.stats import get_anomaly_threshold, get_threshold_peaks_over_threshold
 
 
@@ -13,6 +13,7 @@ def setUp(self):
             data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], index=pd.date_range(start="2023-01-01", periods=10)
         )
         self.sample_2_ts = pd.Series(np.random.rand(100), index=pd.date_range(start="2023-01-01", periods=100))
+        self.sample_3_ts = pd.Series(np.random.rand(100) * 2, index=pd.date_range("2020-01-01", periods=100))
 
     def test_calculate_threshold_for_high_anomaly_type(self):
         pot_threshold = get_threshold_peaks_over_threshold(ts=self.sample_1_ts, t0=3, anomaly_type="high", q=0.90)
@@ -61,14 +62,13 @@ def test_invalid_t0_value_in_exceedance_extraction_function(self):
             get_exceedance_peaks_over_threshold(ts=self.sample_2_ts, t0=None, anomaly_type="high", q=0.90)  # type: ignore
 
     def test_fit_exceedance_with_valid_input(self):
-        ts = pd.Series(np.random.rand(100) * 2, index=pd.date_range("2020-01-01", periods=100))
         t0 = 10
         gpd_params: dict = {}
-        exceedances = get_exceedance_peaks_over_threshold(ts=ts, t0=t0, anomaly_type="high", q=0.9)
+        exceedances = get_exceedance_peaks_over_threshold(ts=self.sample_3_ts, t0=t0, anomaly_type="high", q=0.9)
         anomaly_scores = get_anomaly_score(ts=exceedances, t0=t0, gpd_params=gpd_params)
 
         self.assertIsInstance(anomaly_scores, pd.Series)
-        self.assertEqual(len(anomaly_scores), len(ts.values) - t0)
+        self.assertEqual(len(anomaly_scores), len(self.sample_3_ts.values) - t0)
         self.assertTrue(all(isinstance(gpd_params[i], dict) for i in gpd_params))
 
     def test_fit_exceedance_with_invalid_ts(self):
@@ -79,17 +79,15 @@ def test_fit_exceedance_with_invalid_ts(self):
             get_anomaly_score(ts="not a series", t0=t0, gpd_params=gpd_params)
 
     def test_fit_exceedance_with_invalid_t0(self):
-        ts = pd.Series(np.random.rand(100) * 2, index=pd.date_range("2020-01-01", periods=100))
         gpd_params: dict = {}
 
         with self.assertRaises(ValueError):
-            get_anomaly_score(ts=ts, t0=None, gpd_params=gpd_params)  # type: ignore
+            get_anomaly_score(ts=self.sample_3_ts, t0=None, gpd_params=gpd_params)  # type: ignore
 
     def test_get_anomaly_threshold_with_valid_input(self):
-        ts = pd.Series(np.random.rand(100), index=pd.date_range("2020-01-01", periods=100))
         t1 = 50
         q = 0.90
-        anomaly_threshold = get_anomaly_threshold(ts=ts, t1=t1, q=q)
+        anomaly_threshold = get_anomaly_threshold(ts=self.sample_2_ts, t1=t1, q=q)
 
         self.assertIsInstance(anomaly_threshold, float)
         self.assertTrue(0 <= anomaly_threshold <= 1)
@@ -114,5 +112,25 @@ def test_confirm_correct_quantile_calculation_for_anomaly_threshold(self):
 
         self.assertEqual(anomaly_threshold, expected_anomaly_threshold)
 
+    def test_get_anomaly_with_valid_input(self):
+        self.sample_2_ts.iloc[75:] = self.sample_2_ts.iloc[75:] * 5
+
+        t1 = 50
+        q = 0.90
+        anomalies = get_anomaly(ts=self.sample_2_ts, t1=t1, q=q)
+
+        self.assertIsInstance(anomalies, pd.Series)
+
+        expected_anomalies = self.sample_2_ts.iloc[t1:] > get_anomaly_threshold(ts=self.sample_2_ts, t1=t1, q=q)
+
+        self.assertTrue((anomalies == expected_anomalies).all())
+
+    def test_get_anomaly_with_invalid_ts(self):
+        t1 = 50
+        q = 0.90
+
+        with self.assertRaises(TypeError):
+            get_anomaly(ts="not a series", t1=t1, q=q)
+
     def tearDown(self) -> None:
         return super().tearDown()