-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(detector): create detector classes and plotting function (#19)
* fix(ci): align workflow name for sequence trigger and badge * feat(models): create Detector object as the high level abstraction of all anomalytics functions * feat(detector): add anoamly_type param into the factory class * feat(potdetector): implement get_extremes for POTDetecto * test(get_extremes): ensure get_extremes computation result is correct * feat(get_anomaly_score): implement get_anomaly_score in fit() method for POTDetecto * test(potdetector_fit): ensure the fit produces the correct anomalay scores and parameters * test(potdetector_detect): ensure detect produces correc cnomaly threshold and detect the scores over it * fix(detector_set_params): remove set_params method * feat(non-zero-parameters): implement method in POTDetecto to get all non-zeroes parameters * fix(evaluation-method): add method parameter in Detector evaluation() method * feat(evaluation-method): implement kolmogorov smirnov test for POTDetecto evaluation method * test(kstest-potdetector): ensrue kstest calcualte the statistical distance correctly * fix(potdetectro_eval): bring back the return type for evaluate() into None * feat(qq-plot): implement qq plot into evaluate from POTDetector * feat(return-dataset): create return_dataset for POTDetector to return all private attributes Series or DataFrame * feat(plot): create plot for line, histogram, and gen pareto distribution * feat(potdetector_plot): implement plotting for diosplaying datasets distributions * fix(plot_line): add type: ignore to avoid mypy * fix(peaks_over_threshold): remove .values for plotting gpd in POTDetector * fix(plot_line): change data type to Series, float, or None * fix(test_detector): use assertAlmostEqual instead assertEqual to avoid different byte reading capability * fix(__eval): convert __eval into pandas.DataFrame type
- Loading branch information
1 parent
45538f6
commit a728e7b
Showing
20 changed files
with
1,136 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,15 @@ | ||
__version__ = "0.1.0" | ||
|
||
__all__ = ["get_anomaly", "get_anomaly_score", "get_exceedance_peaks_over_threshold", "read_ts", "set_time_window"] | ||
__all__ = [ | ||
"get_anomaly", | ||
"get_anomaly_score", | ||
"get_detector", | ||
"get_exceedance_peaks_over_threshold", | ||
"read_ts", | ||
"set_time_window", | ||
] | ||
|
||
from anomalytics.models import get_detector | ||
from anomalytics.stats import get_anomaly, get_anomaly_score, get_exceedance_peaks_over_threshold | ||
from anomalytics.time_series import read_ts | ||
from anomalytics.time_windows import set_time_window |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__all__ = ["get_detector"] | ||
|
||
from anomalytics.models.detector import get_detector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import abc | ||
import typing | ||
|
||
import pandas as pd | ||
|
||
|
||
class Detector(metaclass=abc.ABCMeta): | ||
@abc.abstractmethod | ||
def __init__( | ||
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" | ||
): | ||
""" | ||
Initialize the anomaly detection model with a specific statisticail method. | ||
## Parameters | ||
---------- | ||
dataset : typing.Union[pandas.DataFrame, pandas.Series] | ||
DataFame or Series objects to be analyzed. | ||
Index must be date-time and values must be numeric. | ||
anomaly_type : typing.Literal["high", "low"] | ||
Defining which kind of anomaly are we expecting. | ||
""" | ||
... | ||
|
||
@abc.abstractmethod | ||
def fit(self) -> None: | ||
""" | ||
Train the anomaly detection model using the provided data. | ||
""" | ||
... | ||
|
||
@abc.abstractmethod | ||
def detect(self) -> None: | ||
""" | ||
Detect anomalies in the dataset. | ||
""" | ||
... | ||
|
||
@abc.abstractmethod | ||
def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None: | ||
""" | ||
Evaluate the performance of the anomaly detection model based on true and predicted labels. | ||
## Parameters | ||
------------- | ||
method : method: typing.Literal["ks", "qq"], default "ks" | ||
A parameter that decide what statistical method to use for testing the analysis result. | ||
* "ks" for Kolmogorov Smirnov | ||
* "qq" for QQ Plot | ||
""" | ||
... | ||
|
||
@property | ||
@abc.abstractmethod | ||
def params(self) -> typing.Dict: | ||
""" | ||
Retrieve the parameters of the anomaly detection model. | ||
## Returns | ||
---------- | ||
parameters : typing.Dict | ||
The fitting result from the model. | ||
""" | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import typing | ||
|
||
import pandas as pd | ||
|
||
from anomalytics.models.abstract import Detector | ||
|
||
|
||
class AutoencoderDetector(Detector): | ||
""" | ||
Anomaly detector class that implements the "Autoencoder" method. | ||
! TODO: Implement anomaly detection with autoencoder method! | ||
""" | ||
|
||
__slots__ = [ | ||
"__anomaly_type", | ||
"__dataset__", | ||
] | ||
|
||
__anomaly_type: typing.Literal["high", "low"] | ||
__dataset: typing.Union[pd.DataFrame, pd.Series] | ||
|
||
def __init__( | ||
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" | ||
): | ||
""" | ||
Initialize Autoencoder model for anomaly detection. | ||
## Parameters | ||
---------- | ||
dataset : typing.Union[pandas.DataFrame, pandas.Series] | ||
DataFame or Series objects to be analyzed. | ||
Index must be date-time and values must be numeric. | ||
anomaly_type : typing.Literal["high", "low"] | ||
Defining which kind of anomaly are we expecting. | ||
""" | ||
|
||
self.__anomaly_type = anomaly_type | ||
self.__dataset = dataset | ||
|
||
def fit(self) -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def detect(self) -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
@property | ||
def params(self) -> dict: # type: ignore | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def __str__(self) -> str: | ||
return "AE" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import typing | ||
|
||
import pandas as pd | ||
|
||
from anomalytics.models.abstract import Detector | ||
|
||
|
||
class BlockMaximaDetector(Detector): | ||
""" | ||
Anomaly detector class that implements the "Block Maxima" method. | ||
! TODO: Implement anomaly detection with block-maxima method! | ||
""" | ||
|
||
__slots__ = [ | ||
"__anomaly_type", | ||
"__dataset__", | ||
] | ||
|
||
__anomaly_type: typing.Literal["high", "low"] | ||
__dataset: typing.Union[pd.DataFrame, pd.Series] | ||
|
||
def __init__( | ||
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" | ||
): | ||
""" | ||
Initialize Block-Maxima model for anomaly detection. | ||
## Parameters | ||
---------- | ||
dataset : typing.Union[pandas.DataFrame, pandas.Series] | ||
DataFame or Series objects to be analyzed. | ||
Index must be date-time and values must be numeric. | ||
anomaly_type : typing.Literal["high", "low"] | ||
Defining which kind of anomaly are we expecting. | ||
""" | ||
|
||
self.__anomaly_type = anomaly_type | ||
self.__dataset = dataset | ||
|
||
def fit(self) -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def detect(self) -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
@property | ||
def params(self) -> dict: # type: ignore | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def __str__(self) -> str: | ||
return "BM" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import typing | ||
|
||
import pandas as pd | ||
|
||
from anomalytics.models.abstract import Detector | ||
|
||
|
||
class DBSCANDetector(Detector): | ||
""" | ||
Anomaly detector class that implements the "Density-Based Spatial Clustering of Applications with Noise" (D. B. S. C. A. N.) method. | ||
! TODO: Implement anomaly detection with "DBSCAN" method! | ||
""" | ||
|
||
__slots__ = [ | ||
"__anomaly_type", | ||
"__dataset__", | ||
] | ||
|
||
__anomaly_type: typing.Literal["high", "low"] | ||
__dataset: typing.Union[pd.DataFrame, pd.Series] | ||
|
||
def __init__( | ||
self, dataset: typing.Union[pd.DataFrame, pd.Series], anomaly_type: typing.Literal["high", "low"] = "high" | ||
): | ||
""" | ||
Initialize DBSCAN model for anomaly detection. | ||
## Parameters | ||
---------- | ||
dataset : typing.Union[pandas.DataFrame, pandas.Series] | ||
DataFame or Series objects to be analyzed. | ||
Index must be date-time and values must be numeric. | ||
anomaly_type : typing.Literal["high", "low"] | ||
Defining which kind of anomaly are we expecting. | ||
""" | ||
|
||
self.__anomaly_type = anomaly_type | ||
self.__dataset = dataset | ||
|
||
def fit(self) -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def detect(self) -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def evaluate(self, method: typing.Literal["ks", "qq"] = "ks") -> None: | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
@property | ||
def params(self) -> dict: # type: ignore | ||
raise NotImplementedError("Not yet implemented!") | ||
|
||
def __str__(self) -> str: | ||
return "DBSCAN" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from __future__ import annotations | ||
|
||
import logging | ||
import typing | ||
|
||
import pandas as pd | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class FactoryDetector: | ||
def __init__( | ||
self, | ||
method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"], | ||
dataset: typing.Union[pd.DataFrame, pd.Series], | ||
anomaly_type: typing.Literal["high", "low"] = "high", | ||
): | ||
self.method = method | ||
self.dataset = dataset | ||
self.anomaly_type = anomaly_type | ||
|
||
def __call__(self): | ||
if self.method == "AE": | ||
from anomalytics.models.autoencoder import AutoencoderDetector | ||
|
||
return AutoencoderDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "BM": | ||
from anomalytics.models.block_maxima import BlockMaximaDetector | ||
|
||
return BlockMaximaDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "DBSCAN": | ||
from anomalytics.models.dbscan import DBSCANDetector | ||
|
||
return DBSCANDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "ISOF": | ||
from anomalytics.models.isoforest import IsoForestDetector | ||
|
||
return IsoForestDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "MAD": | ||
from anomalytics.models.mad import MADDetector | ||
|
||
return MADDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "1CSVM": | ||
from anomalytics.models.one_class_svm import OneClassSVMDetector | ||
|
||
return OneClassSVMDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "POT": | ||
from anomalytics.models.peaks_over_threshold import POTDetector | ||
|
||
return POTDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
elif self.method == "ZS": | ||
from anomalytics.models.zscore import ZScoreDetector | ||
|
||
return ZScoreDetector(dataset=self.dataset, anomaly_type=self.anomaly_type) | ||
|
||
raise ValueError( | ||
"Invalid value! Available `method` arguments: 'AE', 'BM', 'DBSCAN', 'ISOF', 'MAD', 'POT', 'ZS', '1CSVM'" | ||
) | ||
|
||
|
||
def get_detector( | ||
method: typing.Literal["AE", "BM", "DBSCAN", "ISOF", "MAD", "POT", "ZS", "1CSVM"], | ||
dataset: typing.Union[pd.DataFrame, pd.Series], | ||
anomaly_type: typing.Literal["high", "low"] = "high", | ||
): | ||
return FactoryDetector(method=method, dataset=dataset, anomaly_type=anomaly_type)() |
Oops, something went wrong.