Skip to content

Commit

Permalink
feat(detector): create detector classes and plotting function (#19)
Browse files Browse the repository at this point in the history
* fix(ci): align workflow name for sequence trigger and badge

* feat(models): create Detector object as the high level abstraction of all anomalytics functions

* feat(detector): add anoamly_type param into the factory class

* feat(potdetector): implement get_extremes for POTDetecto

* test(get_extremes): ensure get_extremes computation result is correct

* feat(get_anomaly_score): implement get_anomaly_score in fit() method for POTDetecto

* test(potdetector_fit): ensure the fit produces the correct anomalay scores and parameters

* test(potdetector_detect): ensure detect produces correc cnomaly threshold and detect the scores over it

* fix(detector_set_params): remove set_params method

* feat(non-zero-parameters): implement method in POTDetecto to get all non-zeroes parameters

* fix(evaluation-method): add method parameter in Detector evaluation() method

* feat(evaluation-method): implement kolmogorov smirnov test for POTDetecto evaluation method

* test(kstest-potdetector): ensrue kstest  calcualte the statistical distance correctly

* fix(potdetectro_eval): bring back the return type for evaluate() into None

* feat(qq-plot): implement qq plot into evaluate from POTDetector

* feat(return-dataset): create return_dataset for POTDetector to return all private attributes Series or DataFrame

* feat(plot): create plot for line, histogram, and gen pareto distribution

* feat(potdetector_plot): implement plotting for diosplaying datasets distributions

* fix(plot_line): add type: ignore to avoid mypy

* fix(peaks_over_threshold): remove .values for plotting gpd in POTDetector

* fix(plot_line): change data type to Series, float, or None

* fix(test_detector): use assertAlmostEqual instead assertEqual to avoid different byte reading capability

* fix(__eval): convert __eval into pandas.DataFrame type
  • Loading branch information
Aeternalis-Ingenium authored Dec 4, 2023
1 parent 45538f6 commit a728e7b
Show file tree
Hide file tree
Showing 20 changed files with 1,136 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code-quality.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
- "fix/**/**"
- "release/v*.*.*"
workflow_run:
workflows: ["Build"]
workflows: [CI Build"]
types:
- completed

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
- "fix/**/**"
- "release/v*.*.*"
workflow_run:
workflows: ["Code Quality"]
workflows: ["CI Code Quality"]
types:
- completed

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
<a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/build.yaml">
<img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/build.yaml/badge.svg" alt="CI - Build">
</a>
<a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-style.yaml">
<img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-style.yaml/badge.svg" alt="CI - Code Style">
<a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-quality.yaml">
<img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/code-quality.yaml/badge.svg" alt="CI - Code Quality">
</a>
<a href="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/test.yaml">
<img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/test.yaml/badge.svg" alt="CI - Test">
<img src="https://github.com/Aeternalis-Ingenium/anomalytics/actions/workflows/test.yaml/badge.svg" alt="CI - Automated Testing">
</a>
<a href="https://opensource.org/licenses/MIT">
<a href="https://github.com/Aeternalis-Ingenium/anomalytics/blob/trunk/LICENSE">
<img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT">
</a>
<!-- Replace the '#' in the href with your documentation link -->
Expand Down
10 changes: 9 additions & 1 deletion src/anomalytics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
__version__ = "0.1.0"

__all__ = ["get_anomaly", "get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"]
__all__ = [
"get_anomaly",
"get_anomaly_score",
"get_detector",
"get_exceedance_peaks_over_threshold",
"read_ts",
"set_time_window",
]

from anomalytics.models import get_detector
from anomalytics.stats import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold
from anomalytics.time_series import read_ts
from anomalytics.time_windows import set_time_window
14 changes: 6 additions & 8 deletions src/anomalytics/evals/kolmogorv_smirnov.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,12 @@ def ks_1sample(
)

return dict(
total_nonzero_exceedances=len(ts),
start_datetime=ts.index[0],
end_datetime=fit_params[-1]["datetime"],
stats_distance=ks_result.statistic,
p_value=ks_result.pvalue,
c=c,
loc=loc,
scale=scale,
total_nonzero_exceedances=[ts.shape[0]],
stats_distance=[ks_result.statistic],
p_value=[ks_result.pvalue],
c=[c],
loc=[loc],
scale=[scale],
)
if stats_method == "ZS":
raise NotImplementedError()
Expand Down
3 changes: 3 additions & 0 deletions src/anomalytics/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__all__ = ["get_detector"]

from anomalytics.models.detector import get_detector
65 changes: 65 additions & 0 deletions src/anomalytics/models/abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import abc
import typing

import pandas as pd


class Detector(metaclass=abc.ABCMeta):
@abc.abstractmethod
def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
):
"""
Initialize the anomaly detection model with a specific statisticail method.
## Parameters
----------
dataset : typing.Union[pandas.DataFrame, pandas.Series]
DataFame or Series objects to be analyzed.
Index must be date-time and values must be numeric.
anomaly_type : typing.Literal["high", "low"]
Defining which kind of anomaly are we expecting.
"""
...

@abc.abstractmethod
def fit(self) -> None:
"""
Train the anomaly detection model using the provided data.
"""
...

@abc.abstractmethod
def detect(self) -> None:
"""
Detect anomalies in the dataset.
"""
...

@abc.abstractmethod
def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
"""
Evaluate the performance of the anomaly detection model based on true and predicted labels.
## Parameters
-------------
method : method: typing.Literal["ks", "qq"], default "ks"
A parameter that decide what statistical method to use for testing the analysis result.
* "ks" for Kolmogorov Smirnov
* "qq" for QQ Plot
"""
...

@property
@abc.abstractmethod
def params(self) -> typing.Dict:
"""
Retrieve the parameters of the anomaly detection model.
## Returns
----------
parameters : typing.Dict
The fitting result from the model.
"""
...
55 changes: 55 additions & 0 deletions src/anomalytics/models/autoencoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import typing

import pandas as pd

from anomalytics.models.abstract import Detector


class AutoencoderDetector(Detector):
"""
Anomaly detector class that implements the "Autoencoder" method.
! TODO: Implement anomaly detection with autoencoder method!
"""

__slots__ = [
"__anomaly_type",
"__dataset__",
]

__anomaly_type: typing.Literal["high", "low"]
__dataset: typing.Union[pd.DataFrame, pd.Series]

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
):
"""
Initialize Autoencoder model for anomaly detection.
## Parameters
----------
dataset : typing.Union[pandas.DataFrame, pandas.Series]
DataFame or Series objects to be analyzed.
Index must be date-time and values must be numeric.
anomaly_type : typing.Literal["high", "low"]
Defining which kind of anomaly are we expecting.
"""

self.__anomaly_type = anomaly_type
self.__dataset = dataset

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

def detect(self) -> None:
raise NotImplementedError("Not yet implemented!")

def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
raise NotImplementedError("Not yet implemented!")

@property
def params(self) -> dict: # type: ignore
raise NotImplementedError("Not yet implemented!")

def __str__(self) -> str:
return "AE"
55 changes: 55 additions & 0 deletions src/anomalytics/models/block_maxima.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import typing

import pandas as pd

from anomalytics.models.abstract import Detector


class BlockMaximaDetector(Detector):
"""
Anomaly detector class that implements the "Block Maxima" method.
! TODO: Implement anomaly detection with block-maxima method!
"""

__slots__ = [
"__anomaly_type",
"__dataset__",
]

__anomaly_type: typing.Literal["high", "low"]
__dataset: typing.Union[pd.DataFrame, pd.Series]

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
):
"""
Initialize Block-Maxima model for anomaly detection.
## Parameters
----------
dataset : typing.Union[pandas.DataFrame, pandas.Series]
DataFame or Series objects to be analyzed.
Index must be date-time and values must be numeric.
anomaly_type : typing.Literal["high", "low"]
Defining which kind of anomaly are we expecting.
"""

self.__anomaly_type = anomaly_type
self.__dataset = dataset

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

def detect(self) -> None:
raise NotImplementedError("Not yet implemented!")

def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
raise NotImplementedError("Not yet implemented!")

@property
def params(self) -> dict: # type: ignore
raise NotImplementedError("Not yet implemented!")

def __str__(self) -> str:
return "BM"
55 changes: 55 additions & 0 deletions src/anomalytics/models/dbscan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import typing

import pandas as pd

from anomalytics.models.abstract import Detector


class DBSCANDetector(Detector):
"""
Anomaly detector class that implements the "Density-Based Spatial Clustering of Applications with Noise" (D. B. S. C. A. N.) method.
! TODO: Implement anomaly detection with "DBSCAN" method!
"""

__slots__ = [
"__anomaly_type",
"__dataset__",
]

__anomaly_type: typing.Literal["high", "low"]
__dataset: typing.Union[pd.DataFrame, pd.Series]

def __init__(
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high"
):
"""
Initialize DBSCAN model for anomaly detection.
## Parameters
----------
dataset : typing.Union[pandas.DataFrame, pandas.Series]
DataFame or Series objects to be analyzed.
Index must be date-time and values must be numeric.
anomaly_type : typing.Literal["high", "low"]
Defining which kind of anomaly are we expecting.
"""

self.__anomaly_type = anomaly_type
self.__dataset = dataset

def fit(self) -> None:
raise NotImplementedError("Not yet implemented!")

def detect(self) -> None:
raise NotImplementedError("Not yet implemented!")

def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None:
raise NotImplementedError("Not yet implemented!")

@property
def params(self) -> dict: # type: ignore
raise NotImplementedError("Not yet implemented!")

def __str__(self) -> str:
return "DBSCAN"
73 changes: 73 additions & 0 deletions src/anomalytics/models/detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from __future__ import annotations

import logging
import typing

import pandas as pd

logger = logging.getLogger(__name__)


class FactoryDetector:
def __init__(
self,
method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
dataset: typing.Union[pd.DataFrame, pd.Series],
anomaly_type: typing.Literal["high", "low"] = "high",
):
self.method = method
self.dataset = dataset
self.anomaly_type = anomaly_type

def __call__(self):
if self.method == "AE":
from anomalytics.models.autoencoder import AutoencoderDetector

return AutoencoderDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "BM":
from anomalytics.models.block_maxima import BlockMaximaDetector

return BlockMaximaDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "DBSCAN":
from anomalytics.models.dbscan import DBSCANDetector

return DBSCANDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "ISOF":
from anomalytics.models.isoforest import IsoForestDetector

return IsoForestDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "MAD":
from anomalytics.models.mad import MADDetector

return MADDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "1CSVM":
from anomalytics.models.one_class_svm import OneClassSVMDetector

return OneClassSVMDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "POT":
from anomalytics.models.peaks_over_threshold import POTDetector

return POTDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

elif self.method == "ZS":
from anomalytics.models.zscore import ZScoreDetector

return ZScoreDetector(dataset=self.dataset, anomaly_type=self.anomaly_type)

raise ValueError(
"Invalid value! Available `method` arguments: 'AE', 'BM', 'DBSCAN', 'ISOF', 'MAD', 'POT', 'ZS', '1CSVM'"
)


def get_detector(
method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"],
dataset: typing.Union[pd.DataFrame, pd.Series],
anomaly_type: typing.Literal["high", "low"] = "high",
):
return FactoryDetector(method=method, dataset=dataset, anomaly_type=anomaly_type)()
Loading

0 comments on commit a728e7b

Please sign in to comment.