From c056a5ef717f88d5b12545eb61ba4875fa9146a6 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 12:21:42 +0100 Subject: [PATCH 01/12] ci(python-version): change "3.12.0" into "3.12" --- .github/workflows/ci.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6860b9e..ed20af3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,7 +14,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.10", "3.11", "3.12.0"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4.1.0 - name: Set up Python ${{ matrix.python-version }} @@ -32,7 +32,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.10", "3.11", "3.12.0"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4.1.0 - name: Set up Python ${{ matrix.python-version }} @@ -57,7 +57,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.10", "3.11", "3.12.0"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4.1.0 - name: Set up Python ${{ matrix.python-version }} From f7512f88073d2b60a5da4b80e15fcbca986e4bc5 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 12:24:05 +0100 Subject: [PATCH 02/12] feat(stats): create the main directory for all statistical methods --- src/anomalytics/stats/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/anomalytics/stats/__init__.py diff --git a/src/anomalytics/stats/__init__.py b/src/anomalytics/stats/__init__.py new file mode 100644 index 0000000..e69de29 From 1659b0b4dd478dbd6d66093ec0d138fb8d62549d Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 13:15:48 +0100 Subject: [PATCH 03/12] feat(time-windows): create time windows main directory This directory is to store the implementation of calculating the time windows needed for each statistical method e.g. POT with its t0, t1, t2 concept. --- src/anomalytics/time_windows/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/anomalytics/time_windows/__init__.py diff --git a/src/anomalytics/time_windows/__init__.py b/src/anomalytics/time_windows/__init__.py new file mode 100644 index 0000000..e69de29 From 72caefbadbe1868622900f0b8e5d6fd0bdadb279 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 13:17:35 +0100 Subject: [PATCH 04/12] feat(pot_windows): implement logic for computing t0, t1, and t2 for POT method --- src/anomalytics/time_windows/pot_windows.py | 32 +++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/anomalytics/time_windows/pot_windows.py diff --git a/src/anomalytics/time_windows/pot_windows.py b/src/anomalytics/time_windows/pot_windows.py new file mode 100644 index 0000000..df8ad55 --- /dev/null +++ b/src/anomalytics/time_windows/pot_windows.py @@ -0,0 +1,32 @@ +import typing + + +def compute_pot_windows( + total_rows: int, + analysis_type: typing.Literal["historical", "real-time"], + t0_pct: float = 0.65, + t1_pct: float = 0.25, + t2_pct: float = 0.10, +) -> typing.Tuple[int, int, int]: + if analysis_type == "real-time": + if ((t0_pct + t1_pct > 1.0 or t0_pct + t1_pct == 1.0)) and ( + (t0_pct - t1_pct != 0.0) or (t1_pct - t0_pct != 0.0) + ): + raise ValueError( + "In real-time analysis, the t2 time window will be the last row of the Time Series. Hence `t0_pct` + `t1_pct` must equal to 1.0 (100%)." + ) + t2 = 1 + total_rows = total_rows - t2 + t0 = int(t0_pct * total_rows) + t1 = int(t1_pct * total_rows) + uncounted_days = t0 + t1 - total_rows + else: + t0 = int(t0_pct * total_rows) + t1 = int(t1_pct * total_rows) + t2 = int(t2_pct * total_rows) + uncounted_days = t0 + t1 + t2 - total_rows + if uncounted_days < 0: + t1 += abs(uncounted_days) + elif uncounted_days > 0: + t1 -= uncounted_days + return (t0, t1, t2) From 3412c48c26403b5e35352480a58d3501f0cad4dc Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 13:59:07 +0100 Subject: [PATCH 05/12] feat(gitignore): add coverage files into the ignore list --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4e9e80c..1678204 100644 --- a/.gitignore +++ b/.gitignore @@ -42,7 +42,8 @@ htmlcov/ .coverage.* .cache nosetests.xml -coverage.xml +coverage.* +cov.* *.cover *.py,cover .hypothesis/ From ea208347c59b09e0f59d63e81bf8e3fbb4ab9195 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:00:16 +0100 Subject: [PATCH 06/12] feat(pot_windows): add error handler for if t0 - t1 < 0.0 --- src/anomalytics/time_windows/pot_windows.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/anomalytics/time_windows/pot_windows.py b/src/anomalytics/time_windows/pot_windows.py index df8ad55..ff55a51 100644 --- a/src/anomalytics/time_windows/pot_windows.py +++ b/src/anomalytics/time_windows/pot_windows.py @@ -8,12 +8,12 @@ def compute_pot_windows( t1_pct: float = 0.25, t2_pct: float = 0.10, ) -> typing.Tuple[int, int, int]: + if t0_pct - t1_pct < 0.0: + raise ValueError("T0 time window needs to be bigger than T1 and T2, as a rule of thumb: t0 >= t1 > t2") if analysis_type == "real-time": - if ((t0_pct + t1_pct > 1.0 or t0_pct + t1_pct == 1.0)) and ( - (t0_pct - t1_pct != 0.0) or (t1_pct - t0_pct != 0.0) - ): + if t0_pct + t1_pct != 1.0: raise ValueError( - "In real-time analysis, the t2 time window will be the last row of the Time Series. Hence `t0_pct` + `t1_pct` must equal to 1.0 (100%)." + "In real-time analysis, the t2 time window will be the last row of the Time Series, hence `t0_pct` + `t1_pct` must equal to 1.0 (100%)" ) t2 = 1 total_rows = total_rows - t2 From 55196a8fad72601a4ce8aabdf9c1c12767247eb9 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:02:59 +0100 Subject: [PATCH 07/12] feat(time_window): create main method to call all functions for time window setup The main idea is that each statistical method will have a different time window concept. Hence the repeated set_time_window as an overloaded function. --- src/anomalytics/time_windows/time_window.py | 134 ++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 src/anomalytics/time_windows/time_window.py diff --git a/src/anomalytics/time_windows/time_window.py b/src/anomalytics/time_windows/time_window.py new file mode 100644 index 0000000..b9adf6c --- /dev/null +++ b/src/anomalytics/time_windows/time_window.py @@ -0,0 +1,134 @@ +import logging +import typing + +from anomalytics.time_windows.pot_windows import compute_pot_windows + +logger = logging.getLogger(__name__) + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["AE"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + encoded: typing.List[float], +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["BM"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + block_size: float = 365.2425, +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["DBSCAN"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + total_cluster: int, +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["ISOF"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + isolated: typing.List[float], +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["MAD"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + medians: typing.List[float], +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["1CSVM"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + vectors: typing.List[float], +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["POT"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + t0_pct: float = 0.65, + t1_pct: float = 0.25, + t2_pct: float = 0.10, +) -> typing.Tuple: + ... + + +@typing.overload +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["ZS"], + analysis_type: typing.Literal["historical", "real-time"] = "historical", + *, + upper: float, + lower: float, +) -> typing.Tuple: + ... + + +def set_time_window( # type: ignore + total_rows: int, + method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "1CSVM", "POT", "ZS"], + analysis_type: typing.Literal["historical", "real-time"], + **kwargs, +) -> typing.Tuple: + if method == "AE": + raise NotImplementedError("The method 'AE' for Autoencoder specific time windows hasn't been implemented yet") + if method == "BM": + raise NotImplementedError("The method 'BM' for Block Maxima specific time windows hasn't been implemented yet") + if method == "DBSCAN": + raise NotImplementedError( + "The method 'DBSCAN' for Density-Based Spatial Clustering Application with Noise specific time windows hasn't been implemented yet" + ) + if method == "ISOF": + raise NotImplementedError( + "The method 'ISOF' for Isolation Forest specific time windows hasn't been implemented yet" + ) + if method == "MAD": + raise NotImplementedError( + "The method 'MAD' for Median Absolute Deviation specific time windows hasn't been implemented yet" + ) + if method == "1CSVM": + raise NotImplementedError( + "The method '1CSVM' for One Class Support Vector Method specific time windows hasn't been implemented yet" + ) + if method == "POT": + return compute_pot_windows(total_rows=total_rows, analysis_type=analysis_type, **kwargs) + if method == "ZS": + raise NotImplementedError("The method 'ZS' for Z-Score specific time windows hasn't been implemented yet") + + raise ValueError( + f"Invalid value in '{method}' for the 'method' argument. Availabe methods are: " + "'AUTO', 'BM', 'DBSCAN', 'ISOF', 'MAD', '1CSVM', 'POT', 'ZS'" + ) From 7a2bb55bf781e66eda1196b8c74e4ec546b20bf8 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:04:27 +0100 Subject: [PATCH 08/12] test(time_windows): create a unittest for time_window and compute_pot_windows The test should ensure that compute_pot_windows are executed from the correct set_time_window (overloaded function). --- tests/test_time_windows.py | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/test_time_windows.py diff --git a/tests/test_time_windows.py b/tests/test_time_windows.py new file mode 100644 index 0000000..ebc1820 --- /dev/null +++ b/tests/test_time_windows.py @@ -0,0 +1,42 @@ +from unittest import TestCase + +from anomalytics.time_windows.pot_windows import compute_pot_windows +from anomalytics.time_windows.time_window import set_time_window + + +class TestTimeWindow(TestCase): + def setUp(self) -> None: + return super().setUp() + + def test_compute_pot_windows_executed_correctly_via_set_time_window(self): + t0, t1, t2 = set_time_window( + total_rows=1000, method="POT", analysis_type="historical", t0_pct=0.65, t1_pct=0.25, t2_pct=0.10 + ) + expected_t0, expected_t1, expected_t2 = compute_pot_windows( + total_rows=1000, analysis_type="historical", t0_pct=0.65, t1_pct=0.25, t2_pct=0.10 + ) + self.assertEqual(first=(t0 + t1 + t2), second=(expected_t0 + expected_t1 + expected_t2)) + self.assertEqual(first=t0, second=expected_t0) + self.assertEqual(first=t1, second=expected_t1) + self.assertEqual(first=t2, second=expected_t2) + + def test_pot_windows_for_historical_analysis_via_set_time_window(self): + t0, t1, t2 = set_time_window( + total_rows=1000, method="POT", analysis_type="historical", t0_pct=0.65, t1_pct=0.25, t2_pct=0.10 + ) + self.assertEqual(first=(t0 + t1 + t2), second=1000) + self.assertEqual(first=t0, second=650) + self.assertEqual(first=t1, second=250) + self.assertEqual(first=t2, second=100) + + def test_pot_windows_for_realtime_analysis_via_set_time_window(self): + t0, t1, t2 = set_time_window( + total_rows=1000, method="POT", analysis_type="real-time", t0_pct=0.7, t1_pct=0.3, t2_pct=0.0 + ) + self.assertEqual(first=(t0 + t1 + t2), second=1000) + self.assertEqual(first=t0, second=699) + self.assertEqual(first=t1, second=300) + self.assertEqual(first=t2, second=1) + + def tearDown(self) -> None: + return super().tearDown() From d9af88458d22d405384819f44129c9199e0c3cb1 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:16:25 +0100 Subject: [PATCH 09/12] feat(plots): create plots/ initial directory --- src/anomalytics/plots/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/anomalytics/plots/__init__.py diff --git a/src/anomalytics/plots/__init__.py b/src/anomalytics/plots/__init__.py new file mode 100644 index 0000000..e69de29 From fccdcbf87e49ab80995f646948069ef2e902b7db Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:16:52 +0100 Subject: [PATCH 10/12] feat(evals): create evals/ initial directory for statistical evaluation --- src/anomalytics/evals/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/anomalytics/evals/__init__.py diff --git a/src/anomalytics/evals/__init__.py b/src/anomalytics/evals/__init__.py new file mode 100644 index 0000000..e69de29 From 22322dcf6b1de1d6768c69136cd9511fcb743e82 Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:22:14 +0100 Subject: [PATCH 11/12] feat(import-strategy): import functions in time_windows into __init__.py set_time_window is now accessible from anomalytics/__init__.py and compute_pot_windows now accessible from anomalytics/time_windows/__init__.py --- src/anomalytics/__init__.py | 4 ++++ src/anomalytics/time_windows/__init__.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/anomalytics/__init__.py b/src/anomalytics/__init__.py index 3dc1f76..3d7bc95 100644 --- a/src/anomalytics/__init__.py +++ b/src/anomalytics/__init__.py @@ -1 +1,5 @@ __version__ = "0.1.0" + +__all__ = ["set_time_window"] + +from anomalytics.time_windows import set_time_window diff --git a/src/anomalytics/time_windows/__init__.py b/src/anomalytics/time_windows/__init__.py index e69de29..6ae035a 100644 --- a/src/anomalytics/time_windows/__init__.py +++ b/src/anomalytics/time_windows/__init__.py @@ -0,0 +1,4 @@ +__all__ = ["compute_pot_windows", "set_time_window"] + +from anomalytics.time_windows.pot_windows import compute_pot_windows +from anomalytics.time_windows.time_window import set_time_window From d4d80a2c0dd06c393e3bbbe2ed39900ef8b622ab Mon Sep 17 00:00:00 2001 From: "N. L." Date: Sun, 3 Dec 2023 14:23:02 +0100 Subject: [PATCH 12/12] test(import): ensure time_windows functions are accessible via __init__.py --- tests/test_time_windows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_time_windows.py b/tests/test_time_windows.py index ebc1820..c86c1d4 100644 --- a/tests/test_time_windows.py +++ b/tests/test_time_windows.py @@ -1,7 +1,7 @@ from unittest import TestCase -from anomalytics.time_windows.pot_windows import compute_pot_windows -from anomalytics.time_windows.time_window import set_time_window +from anomalytics import set_time_window +from anomalytics.time_windows import compute_pot_windows class TestTimeWindow(TestCase):