feat(detector): create detector classes and plotting function (#19)

* fix(ci): align workflow name for sequence trigger and badge * feat(models): create Detector object as the high level abstraction of all anomalytics functions * feat(detector): add anoamly_type param into the factory class * feat(potdetector): implement get_extremes for POTDetecto * test(get_extremes): ensure get_extremes computation result is correct * feat(get_anomaly_score): implement get_anomaly_score in fit() method for POTDetecto * test(potdetector_fit): ensure the fit produces the correct anomalay scores and parameters * test(potdetector_detect): ensure detect produces correc cnomaly threshold and detect the scores over it * fix(detector_set_params): remove set_params method * feat(non-zero-parameters): implement method in POTDetecto to get all non-zeroes parameters * fix(evaluation-method): add method parameter in Detector evaluation() method * feat(evaluation-method): implement kolmogorov smirnov test for POTDetecto evaluation method * test(kstest-potdetector): ensrue kstest calcualte the statistical distance correctly * fix(potdetectro_eval): bring back the return type for evaluate() into None * feat(qq-plot): implement qq plot into evaluate from POTDetector * feat(return-dataset): create return_dataset for POTDetector to return all private attributes Series or DataFrame * feat(plot): create plot for line, histogram, and gen pareto distribution * feat(potdetector_plot): implement plotting for diosplaying datasets distributions * fix(plot_line): add type: ignore to avoid mypy * fix(peaks_over_threshold): remove .values for plotting gpd in POTDetector * fix(plot_line): change data type to Series, float, or None * fix(test_detector): use assertAlmostEqual instead assertEqual to avoid different byte reading capability * fix(__eval): convert __eval into pandas.DataFrame type
Aeternalis-Ingenium · Dec 4, 2023 · a728e7b · a728e7b
1 parent 45538f6
commit a728e7b
Show file tree

Hide file tree

Showing 20 changed files with 1,136 additions and 17 deletions.
diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml
@@ -8,7 +8,7 @@ on:
       - "fix/**/**"
       - "release/v*.*.*"
   workflow_run:
-    workflows: ["Build"]
+    workflows: [CI Build"]
     types:
       - completed
 

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -8,7 +8,7 @@ on:
       - "fix/**/**"
       - "release/v*.*.*"
   workflow_run:
-    workflows: ["Code Quality"]
+    workflows: ["CI Code Quality"]
     types:
       - completed
 

diff --git a/README.md b/README.md
@@ -21,13 +21,13 @@
     <a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/build.yaml">
         <img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/build.yaml/badge.svg" alt="CI - Build">
     </a>
-    <a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-style.yaml">
-        <img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-style.yaml/badge.svg" alt="CI - Code Style">
+    <a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-quality.yaml">
+        <img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-quality.yaml/badge.svg" alt="CI - Code Quality">
     </a>
     <a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/test.yaml">
-        <img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/test.yaml/badge.svg" alt="CI - Test">
+        <img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/test.yaml/badge.svg" alt="CI - Automated Testing">
     </a>
-    <a href="https://opensource.org/licenses/MIT">
+    <a href="https://github.com/Aeternalis-Ingenium/anomalytics/blob/trunk/LICENSE">
         <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT">
     </a>
     <!-- Replace the '#' in the href with your documentation link -->

diff --git a/src/anomalytics/__init__.py b/src/anomalytics/__init__.py
@@ -1,7 +1,15 @@
 __version__ = "0.1.0"
 
-__all__ = ["get_anomaly", "get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"]
+__all__ = [
+    "get_anomaly",
+    "get_anomaly_score",
+    "get_detector",
+    "get_exceedance_peaks_over_threshold",
+    "read_ts",
+    "set_time_window",
+]
 
+from anomalytics.models import get_detector
 from anomalytics.stats import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold
 from anomalytics.time_series import read_ts
 from anomalytics.time_windows import set_time_window
diff --git a/src/anomalytics/evals/kolmogorv_smirnov.py b/src/anomalytics/evals/kolmogorv_smirnov.py
@@ -90,14 +90,12 @@ def ks_1sample(
         )
 
         return dict(
-            total_nonzero_exceedances=len(ts),
-            start_datetime=ts.index[0],
-            end_datetime=fit_params[-1]["datetime"],
-            stats_distance=ks_result.statistic,
-            p_value=ks_result.pvalue,
-            c=c,
-            loc=loc,
-            scale=scale,
+            total_nonzero_exceedances=[ts.shape[0]],
+            stats_distance=[ks_result.statistic],
+            p_value=[ks_result.pvalue],
+            c=[c],
+            loc=[loc],
+            scale=[scale],
         )
     if stats_method == "ZS":
         raise NotImplementedError()

diff --git a/src/anomalytics/models/__init__.py b/src/anomalytics/models/__init__.py
@@ -0,0 +1,3 @@
+__all__ = ["get_detector"]
+
+from anomalytics.models.detector import get_detector
diff --git a/src/anomalytics/models/abstract.py b/src/anomalytics/models/abstract.py
@@ -0,0 +1,65 @@
+import abc
+import typing
+
+import pandas as pd
+
+
+class Detector(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def __init__(
+        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+    ):
+        """
+        Initialize the anomaly detection model with a specific statisticail method.
+
+        ## Parameters
+        ----------
+        dataset : typing.Union[pandas.DataFrame, pandas.Series]
+            DataFame or Series objects to be analyzed.
+            Index must be date-time and values must be numeric.
+
+        anomaly_type : typing.Literal["high", "low"]
+            Defining which kind of anomaly are we expecting.
+        """
+        ...
+
+    @abc.abstractmethod
+    def fit(self) -> None:
+        """
+        Train the anomaly detection model using the provided data.
+        """
+        ...
+
+    @abc.abstractmethod
+    def detect(self) -> None:
+        """
+        Detect anomalies in the dataset.
+        """
+        ...
+
+    @abc.abstractmethod
+    def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
+        """
+        Evaluate the performance of the anomaly detection model based on true and predicted labels.
+
+        ## Parameters
+        -------------
+        method : method: typing.Literal["ks", "qq"], default "ks"
+            A parameter that decide what statistical method to use for testing the analysis result.
+            * "ks" for Kolmogorov Smirnov
+            * "qq" for QQ Plot
+        """
+        ...
+
+    @property
+    @abc.abstractmethod
+    def params(self) -> typing.Dict:
+        """
+        Retrieve the parameters of the anomaly detection model.
+
+        ## Returns
+        ----------
+        parameters : typing.Dict
+            The fitting result from the model.
+        """
+        ...
diff --git a/src/anomalytics/models/autoencoder.py b/src/anomalytics/models/autoencoder.py
@@ -0,0 +1,55 @@
+import typing
+
+import pandas as pd
+
+from anomalytics.models.abstract import Detector
+
+
+class AutoencoderDetector(Detector):
+    """
+    Anomaly detector class that implements the "Autoencoder" method.
+    ! TODO: Implement anomaly detection with autoencoder method!
+    """
+
+    __slots__ = [
+        "__anomaly_type",
+        "__dataset__",
+    ]
+
+    __anomaly_type: typing.Literal["high", "low"]
+    __dataset: typing.Union[pd.DataFrame, pd.Series]
+
+    def __init__(
+        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+    ):
+        """
+        Initialize Autoencoder model for anomaly detection.
+
+        ## Parameters
+        ----------
+        dataset : typing.Union[pandas.DataFrame, pandas.Series]
+            DataFame or Series objects to be analyzed.
+            Index must be date-time and values must be numeric.
+
+        anomaly_type : typing.Literal["high", "low"]
+            Defining which kind of anomaly are we expecting.
+        """
+
+        self.__anomaly_type = anomaly_type
+        self.__dataset = dataset
+
+    def fit(self) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    def detect(self) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    @property
+    def params(self) -> dict:  # type: ignore
+        raise NotImplementedError("Not yet implemented!")
+
+    def __str__(self) -> str:
+        return "AE"
diff --git a/src/anomalytics/models/block_maxima.py b/src/anomalytics/models/block_maxima.py
@@ -0,0 +1,55 @@
+import typing
+
+import pandas as pd
+
+from anomalytics.models.abstract import Detector
+
+
+class BlockMaximaDetector(Detector):
+    """
+    Anomaly detector class that implements the "Block Maxima" method.
+    ! TODO: Implement anomaly detection with block-maxima method!
+    """
+
+    __slots__ = [
+        "__anomaly_type",
+        "__dataset__",
+    ]
+
+    __anomaly_type: typing.Literal["high", "low"]
+    __dataset: typing.Union[pd.DataFrame, pd.Series]
+
+    def __init__(
+        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+    ):
+        """
+        Initialize Block-Maxima model for anomaly detection.
+
+        ## Parameters
+        ----------
+        dataset : typing.Union[pandas.DataFrame, pandas.Series]
+            DataFame or Series objects to be analyzed.
+            Index must be date-time and values must be numeric.
+
+        anomaly_type : typing.Literal["high", "low"]
+            Defining which kind of anomaly are we expecting.
+        """
+
+        self.__anomaly_type = anomaly_type
+        self.__dataset = dataset
+
+    def fit(self) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    def detect(self) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    @property
+    def params(self) -> dict:  # type: ignore
+        raise NotImplementedError("Not yet implemented!")
+
+    def __str__(self) -> str:
+        return "BM"
diff --git a/src/anomalytics/models/dbscan.py b/src/anomalytics/models/dbscan.py
@@ -0,0 +1,55 @@
+import typing
+
+import pandas as pd
+
+from anomalytics.models.abstract import Detector
+
+
+class DBSCANDetector(Detector):
+    """
+    Anomaly detector class that implements the "Density-Based Spatial Clustering of Applications with Noise" (D. B. S. C. A. N.) method.
+    ! TODO: Implement anomaly detection with "DBSCAN" method!
+    """
+
+    __slots__ = [
+        "__anomaly_type",
+        "__dataset__",
+    ]
+
+    __anomaly_type: typing.Literal["high", "low"]
+    __dataset: typing.Union[pd.DataFrame, pd.Series]
+
+    def __init__(
+        self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
+    ):
+        """
+        Initialize DBSCAN model for anomaly detection.
+
+        ## Parameters
+        ----------
+        dataset : typing.Union[pandas.DataFrame, pandas.Series]
+            DataFame or Series objects to be analyzed.
+            Index must be date-time and values must be numeric.
+
+        anomaly_type : typing.Literal["high", "low"]
+            Defining which kind of anomaly are we expecting.
+        """
+
+        self.__anomaly_type = anomaly_type
+        self.__dataset = dataset
+
+    def fit(self) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    def detect(self) -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
+        raise NotImplementedError("Not yet implemented!")
+
+    @property
+    def params(self) -> dict:  # type: ignore
+        raise NotImplementedError("Not yet implemented!")
+
+    def __str__(self) -> str:
+        return "DBSCAN"
diff --git a/src/anomalytics/models/detector.py b/src/anomalytics/models/detector.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+import logging
+import typing
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+class FactoryDetector:
+    def __init__(
+        self,
+        method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
+        dataset: typing.Union[pd.DataFrame, pd.Series],
+        anomaly_type: typing.Literal["high", "low"] = "high",
+    ):
+        self.method = method
+        self.dataset = dataset
+        self.anomaly_type = anomaly_type
+
+    def __call__(self):
+        if self.method == "AE":
+            from anomalytics.models.autoencoder import AutoencoderDetector
+
+            return AutoencoderDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "BM":
+            from anomalytics.models.block_maxima import BlockMaximaDetector
+
+            return BlockMaximaDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "DBSCAN":
+            from anomalytics.models.dbscan import DBSCANDetector
+
+            return DBSCANDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "ISOF":
+            from anomalytics.models.isoforest import IsoForestDetector
+
+            return IsoForestDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "MAD":
+            from anomalytics.models.mad import MADDetector
+
+            return MADDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "1CSVM":
+            from anomalytics.models.one_class_svm import OneClassSVMDetector
+
+            return OneClassSVMDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "POT":
+            from anomalytics.models.peaks_over_threshold import POTDetector
+
+            return POTDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        elif self.method == "ZS":
+            from anomalytics.models.zscore import ZScoreDetector
+
+            return ZScoreDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)
+
+        raise ValueError(
+            "Invalid value! Available `method` arguments: 'AE', 'BM', 'DBSCAN', 'ISOF', 'MAD', 'POT', 'ZS', '1CSVM'"
+        )
+
+
+def get_detector(
+    method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
+    dataset: typing.Union[pd.DataFrame, pd.Series],
+    anomaly_type: typing.Literal["high", "low"] = "high",
+):
+    return FactoryDetector(method=method, dataset=dataset, anomaly_type=anomaly_type)()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		__all__ = ["get_detector"]

		from anomalytics.models.detector import get_detector