From fdebce5bbc84197e0f53261cee27abcdb48977aa Mon Sep 17 00:00:00 2001 From: Hubert Date: Mon, 4 Nov 2024 09:17:20 -0500 Subject: [PATCH] updating anomaly check bounds to not have defaults and require inputs for the bound value and isThresholdInclusive, also adding anomaly detection with extended results README, and adding anomaly detection test with 2 anomaly checks on the same suite --- .../anomalydetection/BaseChangeStrategy.scala | 3 +- .../BatchNormalStrategy.scala | 3 +- .../ExtendedDetectionResult.scala | 48 ++++----- .../OnlineNormalStrategy.scala | 4 +- .../SimpleThresholdStrategy.scala | 4 +- .../seasonal/HoltWinters.scala | 14 ++- .../scala/com/amazon/deequ/checks/Check.scala | 24 +++-- .../AnomalyExtendedResultsConstraint.scala | 8 +- ...yDetectionWithExtendedResultsExample.scala | 7 +- .../examples/anomaly_detection_example.md | 2 + ...detection_with_extended_results_example.md | 75 +++++++++++++ .../amazon/deequ/VerificationSuiteTest.scala | 86 +++++++++++++-- .../AbsoluteChangeStrategyTest.scala | 94 ++++++++-------- .../AnomalyDetectorTest.scala | 54 ++++++---- .../BatchNormalStrategyTest.scala | 56 +++++----- .../OnlineNormalStrategyTest.scala | 91 ++++++++++------ .../RateOfChangeStrategyTest.scala | 5 +- .../RelativeRateOfChangeStrategyTest.scala | 88 ++++++++------- .../SimpleThresholdStrategyTest.scala | 22 ++-- .../com/amazon/deequ/checks/CheckTest.scala | 102 +++++++++--------- ...AnomalyExtendedResultsConstraintTest.scala | 85 +++++++++------ .../deequ/constraints/ConstraintsTest.scala | 46 ++++---- 22 files changed, 584 insertions(+), 337 deletions(-) create mode 100644 src/main/scala/com/amazon/deequ/examples/anomaly_detection_with_extended_results_example.md diff --git a/src/main/scala/com/amazon/deequ/anomalydetection/BaseChangeStrategy.scala b/src/main/scala/com/amazon/deequ/anomalydetection/BaseChangeStrategy.scala index 0ac353223..2d0cf3948 100644 --- a/src/main/scala/com/amazon/deequ/anomalydetection/BaseChangeStrategy.scala +++ b/src/main/scala/com/amazon/deequ/anomalydetection/BaseChangeStrategy.scala @@ -131,7 +131,8 @@ trait BaseChangeStrategy (None, false) } (outputSequenceIndex, AnomalyDetectionDataPoint(value, change, - Threshold(lowerBound = Bound(lowerBound), upperBound = Bound(upperBound)), isAnomaly, 1.0, detail)) + BoundedRange(lowerBound = Bound(lowerBound, inclusive = true), + upperBound = Bound(upperBound, inclusive = true)), isAnomaly, 1.0, detail)) } } } diff --git a/src/main/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategy.scala b/src/main/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategy.scala index 7d4bb6304..41a7bad43 100644 --- a/src/main/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategy.scala +++ b/src/main/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategy.scala @@ -113,7 +113,8 @@ case class BatchNormalStrategy( (None, false) } (index, AnomalyDetectionDataPoint(value, value, - Threshold(lowerBound = Bound(lowerBound), upperBound = Bound(upperBound)), isAnomaly, 1.0, detail)) + BoundedRange(lowerBound = Bound(lowerBound, inclusive = true), + upperBound = Bound(upperBound, inclusive = true)), isAnomaly, 1.0, detail)) } } diff --git a/src/main/scala/com/amazon/deequ/anomalydetection/ExtendedDetectionResult.scala b/src/main/scala/com/amazon/deequ/anomalydetection/ExtendedDetectionResult.scala index 7e024b2cf..c966d738a 100644 --- a/src/main/scala/com/amazon/deequ/anomalydetection/ExtendedDetectionResult.scala +++ b/src/main/scala/com/amazon/deequ/anomalydetection/ExtendedDetectionResult.scala @@ -26,26 +26,26 @@ package com.amazon.deequ.anomalydetection * Anomaly Detection Data Point class * This class is different from the Anomaly Class in that this class * wraps around all data points, not just anomalies, and provides extended results including - * if the data point is an anomaly, and the thresholds used in the anomaly calculation. + * if the data point is an anomaly, and the range with bounds used in the anomaly calculation. * * @param dataMetricValue The metric value that is the data point. * @param anomalyMetricValue The metric value that is being used in the anomaly calculation. * This usually aligns with dataMetricValue but not always, * like in a rate of change strategy where the rate of change is the anomaly metric * which may not equal the actual data point value. - * @param anomalyThreshold The thresholds used in the anomaly check, the anomalyMetricValue is - * compared to this threshold. + * @param anomalyCheckRange The range of bounds used in the anomaly check, the anomalyMetricValue is + * compared to this range. * @param isAnomaly If the data point is an anomaly. * @param confidence Confidence of anomaly detection. * @param detail Detailed error message. */ class AnomalyDetectionDataPoint( - val dataMetricValue: Double, - val anomalyMetricValue: Double, - val anomalyThreshold: Threshold, - val isAnomaly: Boolean, - val confidence: Double, - val detail: Option[String]) + val dataMetricValue: Double, + val anomalyMetricValue: Double, + val anomalyCheckRange: BoundedRange, + val isAnomaly: Boolean, + val confidence: Double, + val detail: Option[String]) { def canEqual(that: Any): Boolean = { @@ -64,7 +64,7 @@ class AnomalyDetectionDataPoint( case anomaly: AnomalyDetectionDataPoint => anomaly.dataMetricValue == dataMetricValue && anomaly.anomalyMetricValue == anomalyMetricValue && - anomaly.anomalyThreshold == anomalyThreshold && + anomaly.anomalyCheckRange == anomalyCheckRange && anomaly.isAnomaly == isAnomaly && anomaly.confidence == confidence case _ => false @@ -76,7 +76,7 @@ class AnomalyDetectionDataPoint( var result = 1 result = prime * result + dataMetricValue.hashCode() result = prime * result + anomalyMetricValue.hashCode() - result = prime * result + anomalyThreshold.hashCode() + result = prime * result + anomalyCheckRange.hashCode() result = prime * result + isAnomaly.hashCode() result = prime * result + confidence.hashCode() result @@ -86,21 +86,21 @@ class AnomalyDetectionDataPoint( object AnomalyDetectionDataPoint { def apply(dataMetricValue: Double, anomalyMetricValue: Double, - anomalyThreshold: Threshold = Threshold(), isAnomaly: Boolean = false, + anomalyCheckRange: BoundedRange, isAnomaly: Boolean, confidence: Double, detail: Option[String] = None ): AnomalyDetectionDataPoint = { - new AnomalyDetectionDataPoint(dataMetricValue, anomalyMetricValue, anomalyThreshold, isAnomaly, confidence, detail) + new AnomalyDetectionDataPoint(dataMetricValue, anomalyMetricValue, anomalyCheckRange, isAnomaly, confidence, detail) } } /** - * Threshold class - * Defines threshold for the anomaly detection, defaults to inclusive bounds of Double.Min and Double.Max. + * BoundedRange class + * Defines range for the anomaly detection. * @param upperBound The upper bound or threshold. * @param lowerBound The lower bound or threshold. */ -case class Threshold(lowerBound: Bound = Bound(Double.MinValue), upperBound: Bound = Bound(Double.MaxValue)) +case class BoundedRange(lowerBound: Bound, upperBound: Bound) /** * Bound Class @@ -108,7 +108,7 @@ case class Threshold(lowerBound: Bound = Bound(Double.MinValue), upperBound: Bou * @param value The value of the bound as a Double. * @param inclusive Boolean indicating if the Bound is inclusive or not. */ -case class Bound(value: Double, inclusive: Boolean = true) +case class Bound(value: Double, inclusive: Boolean) @@ -123,23 +123,21 @@ case class ExtendedDetectionResult(anomalyDetectionDataPointSequence: /** * AnomalyDetectionExtendedResult Class - * This class contains anomaly detection extended results through a Sequence of AnomalyDetectionDataPoints. + * This class contains anomaly detection extended results through an AnomalyDetectionDataPoint. * This is currently an optional field in the ConstraintResult class that is exposed to users. * * Currently, anomaly detection only runs on "newest" data point (referring to the dataframe being - * run on by the verification suite) and not multiple data points, so the returned sequence will contain + * run on by the verification suite) and not multiple data points, so this will contain that * one AnomalyDetectionDataPoint. - * In the future, if we allow the anomaly check to detect multiple points, the returned sequence - * may be more than one AnomalyDetectionDataPoints. - * @param anomalyDetectionDataPoints Sequence of AnomalyDetectionDataPoints. + * @param anomalyDetectionDataPoint AnomalyDetectionDataPoint of newest data point generated from check. */ -case class AnomalyDetectionExtendedResult(anomalyDetectionDataPoints: Seq[AnomalyDetectionDataPoint]) +case class AnomalyDetectionExtendedResult(anomalyDetectionDataPoint: AnomalyDetectionDataPoint) /** * AnomalyDetectionAssertionResult Class * This class is returned by the assertion function Check.isNewestPointNonAnomalousWithExtendedResults. - * @param hasNoAnomaly Boolean indicating if there was no anomaly detected. + * @param hasAnomaly Boolean indicating if there was an anomaly detected. * @param anomalyDetectionExtendedResult AnomalyDetectionExtendedResults class. */ -case class AnomalyDetectionAssertionResult(hasNoAnomaly: Boolean, +case class AnomalyDetectionAssertionResult(hasAnomaly: Boolean, anomalyDetectionExtendedResult: AnomalyDetectionExtendedResult) diff --git a/src/main/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategy.scala b/src/main/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategy.scala index 3955eae16..aa9c91276 100644 --- a/src/main/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategy.scala +++ b/src/main/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategy.scala @@ -173,8 +173,8 @@ case class OnlineNormalStrategy( val value = dataSeries(index) (index, AnomalyDetectionDataPoint(value, value, - Threshold(lowerBound = Bound(lowerBound), upperBound = Bound(upperBound)), - calcRes.isAnomaly, 1.0, detail)) + BoundedRange(lowerBound = Bound(lowerBound, inclusive = true), + upperBound = Bound(upperBound, inclusive = true)), calcRes.isAnomaly, 1.0, detail)) } } } diff --git a/src/main/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategy.scala b/src/main/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategy.scala index 03d30c7c7..5e5fe72e8 100644 --- a/src/main/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategy.scala +++ b/src/main/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategy.scala @@ -78,8 +78,8 @@ case class SimpleThresholdStrategy( } (index, AnomalyDetectionDataPoint(value, value, - Threshold(lowerBound = Bound(lowerBound), upperBound = Bound(upperBound)), - isAnomaly, 1.0, detail)) + BoundedRange(lowerBound = Bound(lowerBound, inclusive = true), + upperBound = Bound(upperBound, inclusive = true)), isAnomaly, 1.0, detail)) } } } diff --git a/src/main/scala/com/amazon/deequ/anomalydetection/seasonal/HoltWinters.scala b/src/main/scala/com/amazon/deequ/anomalydetection/seasonal/HoltWinters.scala index 3d837235a..082911b1c 100644 --- a/src/main/scala/com/amazon/deequ/anomalydetection/seasonal/HoltWinters.scala +++ b/src/main/scala/com/amazon/deequ/anomalydetection/seasonal/HoltWinters.scala @@ -17,8 +17,15 @@ package com.amazon.deequ.anomalydetection.seasonal import breeze.linalg.DenseVector -import breeze.optimize.{ApproximateGradientFunction, DiffFunction, LBFGSB} -import com.amazon.deequ.anomalydetection.{Anomaly, AnomalyDetectionDataPoint, AnomalyDetectionStrategy, AnomalyDetectionStrategyWithExtendedResults, Threshold, Bound} +import breeze.optimize.ApproximateGradientFunction +import breeze.optimize.DiffFunction +import breeze.optimize.LBFGSB +import com.amazon.deequ.anomalydetection.Anomaly +import com.amazon.deequ.anomalydetection.AnomalyDetectionDataPoint +import com.amazon.deequ.anomalydetection.AnomalyDetectionStrategy +import com.amazon.deequ.anomalydetection.AnomalyDetectionStrategyWithExtendedResults +import com.amazon.deequ.anomalydetection.BoundedRange +import com.amazon.deequ.anomalydetection.Bound import collection.mutable.ListBuffer @@ -202,7 +209,8 @@ class HoltWinters(seriesPeriodicity: Int) detectionIndex + startIndex -> AnomalyDetectionDataPoint( dataMetricValue = inputValue, anomalyMetricValue = anomalyMetricValue, - anomalyThreshold = Threshold(upperBound = Bound(upperBound)), + anomalyCheckRange = BoundedRange(lowerBound = Bound(Double.MinValue, inclusive = true), + upperBound = Bound(upperBound, inclusive = true)), isAnomaly = isAnomaly, confidence = 1.0, detail = detail diff --git a/src/main/scala/com/amazon/deequ/checks/Check.scala b/src/main/scala/com/amazon/deequ/checks/Check.scala index 8d4ffa1fb..446c2022d 100644 --- a/src/main/scala/com/amazon/deequ/checks/Check.scala +++ b/src/main/scala/com/amazon/deequ/checks/Check.scala @@ -25,7 +25,15 @@ import com.amazon.deequ.analyzers.Histogram import com.amazon.deequ.analyzers.KLLParameters import com.amazon.deequ.analyzers.Patterns import com.amazon.deequ.analyzers.State -import com.amazon.deequ.anomalydetection.{AnomalyDetectionAssertionResult, AnomalyDetectionExtendedResult, ExtendedDetectionResult, AnomalyDetectionStrategy, AnomalyDetectionStrategyWithExtendedResults, AnomalyDetector, AnomalyDetectorWithExtendedResults, DataPoint, HistoryUtils} +import com.amazon.deequ.anomalydetection.AnomalyDetectionAssertionResult +import com.amazon.deequ.anomalydetection.AnomalyDetectionExtendedResult +import com.amazon.deequ.anomalydetection.ExtendedDetectionResult +import com.amazon.deequ.anomalydetection.AnomalyDetectionStrategy +import com.amazon.deequ.anomalydetection.AnomalyDetectionStrategyWithExtendedResults +import com.amazon.deequ.anomalydetection.AnomalyDetector +import com.amazon.deequ.anomalydetection.AnomalyDetectorWithExtendedResults +import com.amazon.deequ.anomalydetection.DataPoint +import com.amazon.deequ.anomalydetection.HistoryUtils import com.amazon.deequ.checks.ColumnCondition.isAnyNotNull import com.amazon.deequ.checks.ColumnCondition.isEachNotNull import com.amazon.deequ.constraints.Constraint._ @@ -1487,21 +1495,21 @@ object Check { */ private[deequ] def getNewestPointAnomalyResults(extendedDetectionResult: ExtendedDetectionResult): AnomalyDetectionAssertionResult = { - val (hasNoAnomaly, anomalyDetectionExtendedResults): (Boolean, AnomalyDetectionExtendedResult) = { + val (hasAnomaly, anomalyDetectionExtendedResults): (Boolean, AnomalyDetectionExtendedResult) = { - // Based on upstream code, this anomaly detection data point sequence should never be empty + // Based on upstream code, this anomaly detection data point sequence should never be empty. require(extendedDetectionResult.anomalyDetectionDataPointSequence != Nil, "anomalyDetectionDataPoints from AnomalyDetectionExtendedResult cannot be empty") - // get the last anomaly detection data point of sequence (there should only be one element for now) - // and check the isAnomaly boolean, also return the last anomaly detection data point - // wrapped in the anomaly detection extended result class + // Get the last anomaly detection data point of sequence (there should only be one element for now). + // Check the isAnomaly boolean, also return the last anomaly detection data point + // wrapped in the anomaly detection extended result class. extendedDetectionResult.anomalyDetectionDataPointSequence match { case _ :+ lastAnomalyDataPointPair => - (!lastAnomalyDataPointPair._2.isAnomaly, AnomalyDetectionExtendedResult(Seq(lastAnomalyDataPointPair._2))) + (lastAnomalyDataPointPair._2.isAnomaly, AnomalyDetectionExtendedResult(lastAnomalyDataPointPair._2)) } } AnomalyDetectionAssertionResult( - hasNoAnomaly = hasNoAnomaly, anomalyDetectionExtendedResult = anomalyDetectionExtendedResults) + hasAnomaly = hasAnomaly, anomalyDetectionExtendedResult = anomalyDetectionExtendedResults) } } diff --git a/src/main/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraint.scala b/src/main/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraint.scala index c55736ddd..3305aaa4f 100644 --- a/src/main/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraint.scala +++ b/src/main/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraint.scala @@ -16,12 +16,14 @@ package com.amazon.deequ.constraints -import com.amazon.deequ.analyzers.{Analyzer, State} +import com.amazon.deequ.analyzers.Analyzer +import com.amazon.deequ.analyzers.State import com.amazon.deequ.anomalydetection.AnomalyDetectionAssertionResult import com.amazon.deequ.metrics.Metric import org.apache.spark.sql.DataFrame -import scala.util.{Failure, Success} +import scala.util.Success +import scala.util.Success /** * Case class for anomaly with extended results constraints that provides unified way to access @@ -76,7 +78,7 @@ private[deequ] case class AnomalyExtendedResultsConstraint[S <: State[S], M, V]( val assertOn = runPickerOnMetric(metricValue) val anomalyAssertionResult = runAssertion(assertOn) - if (anomalyAssertionResult.hasNoAnomaly) { + if (!anomalyAssertionResult.hasAnomaly) { ConstraintResult(this, ConstraintStatus.Success, metric = Some(metric), anomalyDetectionExtendedResultOption = Some(anomalyAssertionResult.anomalyDetectionExtendedResult)) } else { diff --git a/src/main/scala/com/amazon/deequ/examples/AnomalyDetectionWithExtendedResultsExample.scala b/src/main/scala/com/amazon/deequ/examples/AnomalyDetectionWithExtendedResultsExample.scala index dd73b006b..6666b9171 100644 --- a/src/main/scala/com/amazon/deequ/examples/AnomalyDetectionWithExtendedResultsExample.scala +++ b/src/main/scala/com/amazon/deequ/examples/AnomalyDetectionWithExtendedResultsExample.scala @@ -20,7 +20,8 @@ import com.amazon.deequ.VerificationSuite import com.amazon.deequ.analyzers.Size import com.amazon.deequ.anomalydetection.RelativeRateOfChangeStrategy import com.amazon.deequ.checks.CheckStatus._ -import com.amazon.deequ.examples.ExampleUtils.{itemsAsDataframe, withSpark} +import com.amazon.deequ.examples.ExampleUtils.itemsAsDataframe +import com.amazon.deequ.examples.ExampleUtils.withSpark import com.amazon.deequ.repository.ResultKey import com.amazon.deequ.repository.memory.InMemoryMetricsRepository @@ -82,9 +83,9 @@ private[examples] object AnomalyDetectionWithExtendedResultsExample extends App if (verificationResult.status != Success) { println("Anomaly detected in the Size() metric!") val anomalyDetectionDataPoint = verificationResult.checkResults.head._2.constraintResults. - head.anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoints.head + head.anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint println(s"Rate of change of ${anomalyDetectionDataPoint.anomalyMetricValue} was not in " + - s"${anomalyDetectionDataPoint.anomalyThreshold}") + s"${anomalyDetectionDataPoint.anomalyCheckRange}") /* Lets have a look at the actual metrics. */ metricsRepository diff --git a/src/main/scala/com/amazon/deequ/examples/anomaly_detection_example.md b/src/main/scala/com/amazon/deequ/examples/anomaly_detection_example.md index 9acf7d83d..d72f5e951 100644 --- a/src/main/scala/com/amazon/deequ/examples/anomaly_detection_example.md +++ b/src/main/scala/com/amazon/deequ/examples/anomaly_detection_example.md @@ -1,5 +1,7 @@ # Anomaly detection +*After reading this page, check out [anomaly checks with extended results](https://github.com/awslabs/deequ/blob/master/src/main/scala/com/amazon/deequ/examples/anomaly_detection_with_extended_results_example.md) for how to access more details about the anomaly check such as the upper and lower bounds used in the check. This requires using a different method that has the same signature.* + Very often, it is hard to exactly define what constraints we want to evaluate on our data. However, we often have a better understanding of how much change we expect in certain metrics of our data. Therefore, **deequ** supports anomaly detection for data quality metrics. The idea is that we regularly store the metrics of our data in a [MetricsRepository](https://github.com/awslabs/deequ/blob/master/src/main/scala/com/amazon/deequ/examples/metrics_repository_example.md). Once we do that, we can run anomaly checks that compare the current value of the metric to its values in the past and allow us to detect anomalous changes. In this simple example, we assume that we compute the size of a dataset every day and we want to ensure that it does not change drastically: the number of rows on a given day should not be more than double of what we have seen on the day before. diff --git a/src/main/scala/com/amazon/deequ/examples/anomaly_detection_with_extended_results_example.md b/src/main/scala/com/amazon/deequ/examples/anomaly_detection_with_extended_results_example.md new file mode 100644 index 000000000..a061f0e75 --- /dev/null +++ b/src/main/scala/com/amazon/deequ/examples/anomaly_detection_with_extended_results_example.md @@ -0,0 +1,75 @@ +# Anomaly detection with extended results + +Using the `addAnomalyCheckWithExtendedResults` method instead of the original `addAnomalyCheck`method, you can get more +detailed results about the anomaly detection result from the newly created metric. You can get details such as: + +- dataMetricValue: The metric value that is the data point. +- anomalyMetricValue: The value of the metric that is being checked, which isn't always equal to the dataMetricValue. +- anomalyCheckRange: The range of bounds used in the anomaly check, the anomalyMetricValue is compared to this range. +- isAnomaly: If the anomalyMetricValue is outside the anomalyCheckRange, this is true. +- confidence: The confidence of the anomaly detection. +- detail: An optional detail message. + +These are contained within the AnomalyDetectionDataPoint class. +```scala +class AnomalyDetectionDataPoint( +val dataMetricValue: Double, +val anomalyMetricValue: Double, +val anomalyCheckRange: BoundedRange, +val isAnomaly: Boolean, +val confidence: Double, +val detail: Option[String]) + +case class BoundedRange(lowerBound: Bound, upperBound: Bound) + +case class Bound(value: Double, inclusive: Boolean) +``` + +In terms of accessing the result, the AnomalyDetectionDataPoint is wrapped in an AnomalyDetectionExtendedResult class +that is an optional field in the ConstraintResult class. The ConstraintResult class is a class that contains the +results of a constraint check. + +```scala +case class ConstraintResult( + constraint: Constraint, + status: ConstraintStatus.Value, + message: Option[String] = None, + metric: Option[Metric[_]] = None, + anomalyDetectionExtendedResultOption: Option[AnomalyDetectionExtendedResult] = None) + +case class AnomalyDetectionExtendedResult(anomalyDetectionDataPoint: AnomalyDetectionDataPoint) +``` + + +In order to get extended results you need to run your verification suite with +the `addAnomalyCheckWithExtendedResults` method, which has the same method signature as the original `addAnomalyCheck` +method. + +```scala +val result = VerificationSuite() + .onData(yesterdaysDataset) + .useRepository(metricsRepository) + .saveOrAppendResult(yesterdaysKey) + .addAnomalyCheckWithExtendedResults( + RelativeRateOfChangeStrategy(maxRateIncrease = Some(2.0)), + Size()) + .run() + +val anomalyDetectionExtendedResult: AnomalyDetectionExtendedResult = result.checkResults.head._2.constraintResults.head + .anomalyDetectionExtendedResultOption.getOrElse("placeholder to do something else") + +val anomalyDetectionDataPoint: AnomalyDetectionDataPoint = anomalyDetectionExtendedResult.anomalyDetectionDataPoint +``` + +You can access the values of the anomaly detection extended results like the anomalyMetricValue and anomalyCheckRange. +```scala +println(s"Anomaly check range: ${anomalyDetectionDataPoint.anomalyCheckRange}") +println(s"Anomaly metric value: ${anomalyDetectionDataPoint.anomalyMetricValue}") +``` + +``` +Anomaly check range: BoundedRange(Bound(-2.0,true),Bound(2.0,true)) +Anomaly metric value: 4.5 +``` + +An [executable version of this example with extended results](https://github.com/awslabs/deequ/blob/master/src/main/scala/com/amazon/deequ/examples/AnomalyDetectionWithExtendedResultsExample.scala) is available as part of our code base. diff --git a/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala b/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala index f410ea821..54d9040a4 100644 --- a/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala +++ b/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala @@ -18,7 +18,10 @@ package com.amazon.deequ import com.amazon.deequ.analyzers._ import com.amazon.deequ.analyzers.runners.AnalyzerContext -import com.amazon.deequ.anomalydetection.{AbsoluteChangeStrategy, AnomalyDetectionDataPoint, AnomalyDetectionExtendedResult, Bound, Threshold} +import com.amazon.deequ.anomalydetection.AbsoluteChangeStrategy +import com.amazon.deequ.anomalydetection.AnomalyDetectionDataPoint +import com.amazon.deequ.anomalydetection.Bound +import com.amazon.deequ.anomalydetection.BoundedRange import com.amazon.deequ.checks.Check import com.amazon.deequ.checks.CheckLevel import com.amazon.deequ.checks.CheckStatus @@ -1132,23 +1135,26 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec val checkResultsOne = verificationResultOne.checkResults.head._2.status val actualResultsOneAnomalyDetectionDataPoint = verificationResultOne.checkResults.head._2.constraintResults.head - .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoints.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint val expectedResultsOneAnomalyDetectionDataPoint = - AnomalyDetectionDataPoint(11.0, 7.0, Threshold(Bound(-2.0), Bound(2.0)), isAnomaly = true, 1.0) + AnomalyDetectionDataPoint(11.0, 7.0, BoundedRange(Bound(-2.0, inclusive = true), + Bound(2.0, inclusive = true)), isAnomaly = true, 1.0) val checkResultsTwo = verificationResultTwo.checkResults.head._2.status val actualResultsTwoAnomalyDetectionDataPoint = verificationResultTwo.checkResults.head._2.constraintResults.head - .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoints.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint val expectedResultsTwoAnomalyDetectionDataPoint = - AnomalyDetectionDataPoint(11.0, 0.0, Threshold(Bound(-7.0), Bound(7.0)), isAnomaly = false, 1.0) + AnomalyDetectionDataPoint(11.0, 0.0, BoundedRange(Bound(-7.0, inclusive = true), + Bound(7.0, inclusive = true)), isAnomaly = false, 1.0) val checkResultsThree = verificationResultThree.checkResults.head._2.status val actualResultsThreeAnomalyDetectionDataPoint = verificationResultThree.checkResults.head._2.constraintResults.head - .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoints.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint val expectedResultsThreeAnomalyDetectionDataPoint = - AnomalyDetectionDataPoint(11.0, 0.0, Threshold(Bound(-7.0), Bound(7.0)), isAnomaly = false, 1.0) + AnomalyDetectionDataPoint(11.0, 0.0, BoundedRange(Bound(-7.0, inclusive = true), + Bound(7.0, inclusive = true)), isAnomaly = false, 1.0) assert(checkResultsOne == CheckStatus.Warning) assert(checkResultsTwo == CheckStatus.Success) @@ -1198,16 +1204,74 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec val checkResultsOne = verificationResultOne.checkResults.values.toSeq(1).status val actualResultsOneAnomalyDetectionDataPoint = verificationResultOne.checkResults.values.toSeq(1).constraintResults.head - .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoints.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint val expectedResultsOneAnomalyDetectionDataPoint = - AnomalyDetectionDataPoint(11.0, 7.0, Threshold(Bound(-2.0), Bound(2.0)), isAnomaly = true, 1.0) + AnomalyDetectionDataPoint(11.0, 7.0, BoundedRange(Bound(-2.0, inclusive = true), + Bound(2.0, inclusive = true)), isAnomaly = true, 1.0) val checkResultsTwo = verificationResultTwo.checkResults.head._2.status val actualResultsTwoAnomalyDetectionDataPoint = verificationResultTwo.checkResults.head._2.constraintResults.head - .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoints.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint val expectedResultsTwoAnomalyDetectionDataPoint = - AnomalyDetectionDataPoint(11.0, 0.0, Threshold(Bound(-7.0), Bound(7.0)), isAnomaly = false, 1.0) + AnomalyDetectionDataPoint(11.0, 0.0, BoundedRange(Bound(-7.0, inclusive = true), + Bound(7.0, inclusive = true)), isAnomaly = false, 1.0) + + assert(checkResultsOne == CheckStatus.Warning) + assert(checkResultsTwo == CheckStatus.Success) + + assert(actualResultsOneAnomalyDetectionDataPoint == expectedResultsOneAnomalyDetectionDataPoint) + assert(actualResultsTwoAnomalyDetectionDataPoint == expectedResultsTwoAnomalyDetectionDataPoint) + } + } + + + "addAnomalyCheckWithExtendedResults with two anomaly checks on the same suite should work and " + + "output extended results" in + withSparkSession { sparkSession => + evaluateWithRepositoryWithHistory { repository => + + val df = getDfWithNRows(sparkSession, 11) + val saveResultsWithKey = ResultKey(5, Map.empty) + + val analyzers = Completeness("item") :: Nil + + val verificationResultOne = VerificationSuite() + .onData(df) + .addCheck(Check(CheckLevel.Error, "group-1").hasSize(_ == 11)) + .useRepository(repository) + .addRequiredAnalyzers(analyzers) + .saveOrAppendResult(saveResultsWithKey) + .addAnomalyCheckWithExtendedResults( + AbsoluteChangeStrategy(Some(-2.0), Some(2.0)), + Size(), + Some(AnomalyCheckConfig(CheckLevel.Warning, "Anomaly check to fail")) + ) + .addAnomalyCheckWithExtendedResults( + AbsoluteChangeStrategy(Some(-7.0), Some(7.0)), + Size(), + Some(AnomalyCheckConfig(CheckLevel.Error, "Anomaly check to succeed", + Map.empty, Some(0), Some(11))) + ) + .run() + + + val checkResultsOne = verificationResultOne.checkResults.values.toSeq(1).status + val actualResultsOneAnomalyDetectionDataPoint = + verificationResultOne.checkResults.values.toSeq(1).constraintResults.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint + val expectedResultsOneAnomalyDetectionDataPoint = + AnomalyDetectionDataPoint(11.0, 7.0, BoundedRange(Bound(-2.0, inclusive = true), + Bound(2.0, inclusive = true)), isAnomaly = true, 1.0) + + val checkResultsTwo = verificationResultOne.checkResults.values.toSeq(2).status + val actualResultsTwoAnomalyDetectionDataPoint = + verificationResultOne.checkResults.values.toSeq(2).constraintResults.head + .anomalyDetectionExtendedResultOption.get.anomalyDetectionDataPoint + val expectedResultsTwoAnomalyDetectionDataPoint = + AnomalyDetectionDataPoint(11.0, 7.0, BoundedRange(Bound(-7.0, inclusive = true), + Bound(7.0, inclusive = true)), isAnomaly = false, 1.0) + assert(checkResultsOne == CheckStatus.Warning) assert(checkResultsTwo == CheckStatus.Success) diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/AbsoluteChangeStrategyTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/AbsoluteChangeStrategyTest.scala index f970f9812..1c435b546 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/AbsoluteChangeStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/AbsoluteChangeStrategyTest.scala @@ -17,7 +17,8 @@ package com.amazon.deequ.anomalydetection import breeze.linalg.DenseVector -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec class AbsoluteChangeStrategyTest extends WordSpec with Matchers { @@ -158,35 +159,35 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { "detect all anomalies if no interval specified" in { val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(Bound(-2.0), Bound(2.0)) + val expectedAnomalyCheckRange = BoundedRange(Bound(-2.0, inclusive = true), Bound(2.0, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (20, AnomalyDetectionDataPoint(20, 19, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (21, AnomalyDetectionDataPoint(-21, -41, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (22, AnomalyDetectionDataPoint(22, 43, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (23, AnomalyDetectionDataPoint(-23, -45, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (24, AnomalyDetectionDataPoint(24, 47, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (25, AnomalyDetectionDataPoint(-25, -49, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(26, 51, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(-27, -53, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(28, 55, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(-29, -57, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(30, 59, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (31, AnomalyDetectionDataPoint(1, -29, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (20, AnomalyDetectionDataPoint(20, 19, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (21, AnomalyDetectionDataPoint(-21, -41, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (22, AnomalyDetectionDataPoint(22, 43, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (23, AnomalyDetectionDataPoint(-23, -45, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (24, AnomalyDetectionDataPoint(24, 47, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (25, AnomalyDetectionDataPoint(-25, -49, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(26, 51, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(-27, -53, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(28, 55, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(-29, -57, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(30, 59, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (31, AnomalyDetectionDataPoint(1, -29, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } "only detect anomalies in interval" in { val anomalyResult = strategy.detectWithExtendedResults(data, (25, 50)).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(Bound(-2.0), Bound(2.0)) + val expectedAnomalyCheckRange = BoundedRange(Bound(-2.0, inclusive = true), Bound(2.0, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (25, AnomalyDetectionDataPoint(-25, -49, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(26, 51, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(-27, -53, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(28, 55, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(-29, -57, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(30, 59, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (31, AnomalyDetectionDataPoint(1, -29, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (25, AnomalyDetectionDataPoint(-25, -49, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(26, 51, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(-27, -53, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(28, 55, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(-29, -57, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(30, 59, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (31, AnomalyDetectionDataPoint(1, -29, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -194,15 +195,16 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { "ignore min rate if none is given" in { val strategy = AbsoluteChangeStrategy(None, Some(1.0)) val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(upperBound = Bound(1.0)) + val expectedAnomalyCheckRange = BoundedRange(lowerBound = Bound(Double.MinValue, inclusive = true), + upperBound = Bound(1.0, inclusive = true)) // Anomalies with positive values only val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (20, AnomalyDetectionDataPoint(20, 19, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (22, AnomalyDetectionDataPoint(22, 43, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (24, AnomalyDetectionDataPoint(24, 47, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(26, 51, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(28, 55, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(30, 59, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (20, AnomalyDetectionDataPoint(20, 19, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (22, AnomalyDetectionDataPoint(22, 43, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (24, AnomalyDetectionDataPoint(24, 47, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(26, 51, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(28, 55, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(30, 59, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) @@ -211,16 +213,17 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { "ignore max rate if none is given" in { val strategy = AbsoluteChangeStrategy(Some(-1.0), None) val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(lowerBound = Bound(-1.0)) + val expectedAnomalyCheckRange = BoundedRange(lowerBound = Bound(-1.0, inclusive = true), + upperBound = Bound(Double.MaxValue, inclusive = true)) // Anomalies with negative values only val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (21, AnomalyDetectionDataPoint(-21, -41, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (23, AnomalyDetectionDataPoint(-23, -45, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (25, AnomalyDetectionDataPoint(-25, -49, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(-27, -53, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(-29, -57, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (31, AnomalyDetectionDataPoint(1, -29, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (21, AnomalyDetectionDataPoint(-21, -41, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (23, AnomalyDetectionDataPoint(-23, -45, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (25, AnomalyDetectionDataPoint(-25, -49, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(-27, -53, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(-29, -57, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (31, AnomalyDetectionDataPoint(1, -29, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -239,8 +242,10 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { val result = strategy.detectWithExtendedResults(data).filter({ case (_, anom) => anom.isAnomaly }) val expectedResult = Seq( - (4, AnomalyDetectionDataPoint(18.0, 9.0, Threshold(upperBound = Bound(8.0)), isAnomaly = true, 1.0)), - (5, AnomalyDetectionDataPoint(72.0, 42.0, Threshold(upperBound = Bound(8.0)), isAnomaly = true, 1.0)) + (4, AnomalyDetectionDataPoint(18.0, 9.0, BoundedRange(lowerBound = Bound(Double.MinValue, inclusive = true), + upperBound = Bound(8.0, inclusive = true)), isAnomaly = true, 1.0)), + (5, AnomalyDetectionDataPoint(72.0, 42.0, BoundedRange(lowerBound = Bound(Double.MinValue, inclusive = true), + upperBound = Bound(8.0, inclusive = true)), isAnomaly = true, 1.0)) ) assert(result == expectedResult) } @@ -251,7 +256,8 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { val result = strategy.detectWithExtendedResults(data, (5, 6)).filter({case (_, anom) => anom.isAnomaly}) val expectedResult = Seq( - (5, AnomalyDetectionDataPoint(72.0, 42.0, Threshold(upperBound = Bound(8.0)), isAnomaly = true, 1.0)) + (5, AnomalyDetectionDataPoint(72.0, 42.0, BoundedRange(lowerBound = Bound(Double.MinValue, inclusive = true), + upperBound = Bound(8.0, inclusive = true)), isAnomaly = true, 1.0)) ) assert(result == expectedResult) } @@ -261,8 +267,10 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { val result = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) val expectedResult = Seq( - (2, AnomalyDetectionDataPoint(4.0, 5.0, Threshold(Bound(-2.0), Bound(2.0)), isAnomaly = true, 1.0)), - (3, AnomalyDetectionDataPoint(-7.0, -11.0, Threshold(Bound(-2.0), Bound(2.0)), isAnomaly = true, 1.0)) + (2, AnomalyDetectionDataPoint(4.0, 5.0, BoundedRange(Bound(-2.0, inclusive = true), + Bound(2.0, inclusive = true)), isAnomaly = true, 1.0)), + (3, AnomalyDetectionDataPoint(-7.0, -11.0, BoundedRange(Bound(-2.0, inclusive = true), + Bound(2.0, inclusive = true)), isAnomaly = true, 1.0)) ) assert(result == expectedResult) } @@ -292,8 +300,8 @@ class AbsoluteChangeStrategyTest extends WordSpec with Matchers { result.foreach { case (_, anom) => val value = anom.anomalyMetricValue - val upperBound = anom.anomalyThreshold.upperBound.value - val lowerBound = anom.anomalyThreshold.lowerBound.value + val upperBound = anom.anomalyCheckRange.upperBound.value + val lowerBound = anom.anomalyCheckRange.lowerBound.value assert(value < lowerBound || value > upperBound) } diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/AnomalyDetectorTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/AnomalyDetectorTest.scala index 6068d111b..cdb87b763 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/AnomalyDetectorTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/AnomalyDetectorTest.scala @@ -17,7 +17,9 @@ package com.amazon.deequ.anomalydetection import org.scalamock.scalatest.MockFactory -import org.scalatest.{Matchers, PrivateMethodTester, WordSpec} +import org.scalatest.Matchers +import org.scalatest.PrivateMethodTester +import org.scalatest.WordSpec class AnomalyDetectorTest extends WordSpec with Matchers with MockFactory with PrivateMethodTester { @@ -111,6 +113,10 @@ class AnomalyDetectorTest extends WordSpec with Matchers with MockFactory with P val fakeAnomalyDetector = stub[AnomalyDetectionStrategyWithExtendedResults] + // This is used as a default bounded range value for anomaly detection + val defaultBoundedRange = BoundedRange(lowerBound = Bound(0.0, inclusive = true), + upperBound = Bound(1.0, inclusive = true)) + val aD = AnomalyDetectorWithExtendedResults(fakeAnomalyDetector) val data = Seq((0L, -1.0), (1L, 2.0), (2L, 3.0), (3L, 0.5)).map { case (t, v) => DataPoint[Double](t, Option(v)) @@ -121,20 +127,24 @@ class AnomalyDetectorTest extends WordSpec with Matchers with MockFactory with P DataPoint[Double](2L, None), DataPoint[Double](3L, Option(1.0))) (fakeAnomalyDetector.detectWithExtendedResults _ when(Vector(1.0, 2.0, 1.0), (0, 3))) - .returns(Seq((1, AnomalyDetectionDataPoint(2.0, 2.0, Threshold(), confidence = 1.0)))) + .returns(Seq((1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, + isAnomaly = true)))) val anomalyResult = aD.detectAnomaliesInHistoryWithExtendedResults(data, (0L, 4L)) - assert(anomalyResult == ExtendedDetectionResult(Seq((1L, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0))))) + assert(anomalyResult == ExtendedDetectionResult(Seq((1L, AnomalyDetectionDataPoint(2.0, 2.0, + defaultBoundedRange, confidence = 1.0, isAnomaly = true))))) } "only detect values in range" in { (fakeAnomalyDetector.detectWithExtendedResults _ when(Vector(-1.0, 2.0, 3.0, 0.5), (2, 4))) - .returns(Seq((2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)))) + .returns(Seq((2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, + isAnomaly = true)))) val anomalyResult = aD.detectAnomaliesInHistoryWithExtendedResults(data, (2L, 4L)) - assert(anomalyResult == ExtendedDetectionResult(Seq((2L, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0))))) + assert(anomalyResult == ExtendedDetectionResult(Seq((2L, AnomalyDetectionDataPoint(3.0, 3.0, + defaultBoundedRange, confidence = 1.0, isAnomaly = true))))) } "throw an error when intervals are not ordered" in { @@ -153,16 +163,17 @@ class AnomalyDetectorTest extends WordSpec with Matchers with MockFactory with P (fakeAnomalyDetector.detectWithExtendedResults _ when(data.map(_.metricValue.get).toVector, (0, 2))) .returns ( Seq( - (0, AnomalyDetectionDataPoint(5.0, 5.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(5.0, 5.0, confidence = 1.0)) + (0, AnomalyDetectionDataPoint(5.0, 5.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (1, AnomalyDetectionDataPoint(5.0, 5.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)) ) ) val anomalyResult = aD.detectAnomaliesInHistoryWithExtendedResults(data, (200L, 401L)) assert(anomalyResult == ExtendedDetectionResult(Seq( - (200L, AnomalyDetectionDataPoint(5.0, 5.0, confidence = 1.0)), - (400L, AnomalyDetectionDataPoint(5.0, 5.0, confidence = 1.0))))) + (200L, AnomalyDetectionDataPoint(5.0, 5.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (400L, AnomalyDetectionDataPoint(5.0, 5.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true))) + )) } "treat unordered values with time gaps correctly" in { @@ -174,18 +185,18 @@ class AnomalyDetectorTest extends WordSpec with Matchers with MockFactory with P (fakeAnomalyDetector.detectWithExtendedResults _ when(Vector(0.5, -1.0, 3.0, 2.0), (0, 4))) .returns( Seq( - (1, AnomalyDetectionDataPoint(-1.0, -1.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)) + (1, AnomalyDetectionDataPoint(-1.0, -1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (3, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)) ) ) val anomalyResult = aD.detectAnomaliesInHistoryWithExtendedResults(data) assert(anomalyResult == ExtendedDetectionResult( - Seq((10L, AnomalyDetectionDataPoint(-1.0, -1.0, confidence = 1.0)), - (11L, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (25L, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0))))) + Seq((10L, AnomalyDetectionDataPoint(-1.0, -1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (11L, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (25L, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true))))) } "treat unordered values without time gaps correctly" in { @@ -194,16 +205,17 @@ class AnomalyDetectorTest extends WordSpec with Matchers with MockFactory with P } (fakeAnomalyDetector.detectWithExtendedResults _ when(Vector(0.5, -1.0, 3.0, 2.0), (0, 4))) - .returns(Seq((1, AnomalyDetectionDataPoint(-1.0, -1.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)))) + .returns(Seq((1, AnomalyDetectionDataPoint(-1.0, -1.0, defaultBoundedRange, confidence = 1.0, + isAnomaly = true)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (3, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)))) val anomalyResult = aD.detectAnomaliesInHistoryWithExtendedResults(data) assert(anomalyResult == ExtendedDetectionResult(Seq( - (1L, AnomalyDetectionDataPoint(-1.0, -1.0, confidence = 1.0)), - (2L, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3L, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0))))) + (1L, AnomalyDetectionDataPoint(-1.0, -1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (2L, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)), + (3L, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true))))) } } diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategyTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategyTest.scala index 0575ad3f7..1634053eb 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/BatchNormalStrategyTest.scala @@ -16,7 +16,8 @@ package com.amazon.deequ.anomalydetection -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec import scala.util.Random @@ -117,14 +118,15 @@ class BatchNormalStrategyTest extends WordSpec with Matchers { val anomalyResult = strategy.detectWithExtendedResults(data, (25, 50)).filter({ case (_, anom) => anom.isAnomaly }) - val expectedAnomalyThreshold = Threshold(Bound(-9.280850004177061), Bound(10.639954755150061)) + val expectedAnomalyCheckRange = BoundedRange(Bound(-9.280850004177061, inclusive = true), + Bound(10.639954755150061, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (25, AnomalyDetectionDataPoint(data(25), data(25), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(data(26), data(26), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(data(27), data(27), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(data(28), data(28), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(data(29), data(29), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(data(30), data(30), expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (25, AnomalyDetectionDataPoint(data(25), data(25), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(data(26), data(26), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(data(27), data(27), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(data(28), data(28), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(data(29), data(29), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(data(30), data(30), expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -134,15 +136,16 @@ class BatchNormalStrategyTest extends WordSpec with Matchers { val anomalyResult = strategy.detectWithExtendedResults(data, (20, 31)).filter({ case (_, anom) => anom.isAnomaly }) - val expectedAnomalyThreshold = Threshold(Bound(Double.NegativeInfinity), Bound(0.7781496015857838)) + val expectedAnomalyCheckRange = BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), + Bound(0.7781496015857838, inclusive = true)) // Anomalies with positive values only val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (20, AnomalyDetectionDataPoint(data(20), data(20), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (22, AnomalyDetectionDataPoint(data(22), data(22), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (24, AnomalyDetectionDataPoint(data(24), data(24), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(data(26), data(26), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(data(28), data(28), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(data(30), data(30), expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (20, AnomalyDetectionDataPoint(data(20), data(20), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (22, AnomalyDetectionDataPoint(data(22), data(22), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (24, AnomalyDetectionDataPoint(data(24), data(24), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(data(26), data(26), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(data(28), data(28), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(data(30), data(30), expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -151,15 +154,16 @@ class BatchNormalStrategyTest extends WordSpec with Matchers { val strategy = BatchNormalStrategy(Some(1.0), None) val anomalyResult = strategy.detectWithExtendedResults(data, (10, 30)).filter({ case (_, anom) => anom.isAnomaly }) - val expectedAnomalyThreshold = Threshold(Bound(-5.063730045618394), Bound(Double.PositiveInfinity)) + val expectedAnomalyCheckRange = BoundedRange(Bound(-5.063730045618394, inclusive = true), + Bound(Double.PositiveInfinity, inclusive = true)) // Anomalies with negative values only val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (21, AnomalyDetectionDataPoint(data(21), data(21), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (23, AnomalyDetectionDataPoint(data(23), data(23), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (25, AnomalyDetectionDataPoint(data(25), data(25), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(data(27), data(27), expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(data(29), data(29), expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (21, AnomalyDetectionDataPoint(data(21), data(21), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (23, AnomalyDetectionDataPoint(data(23), data(23), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (25, AnomalyDetectionDataPoint(data(25), data(25), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(data(27), data(27), expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(data(29), data(29), expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -171,8 +175,10 @@ class BatchNormalStrategyTest extends WordSpec with Matchers { strategy.detectWithExtendedResults(data, (3, 5)).filter({ case (_, anom) => anom.isAnomaly }) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (3, AnomalyDetectionDataPoint(1000, 1000, Threshold(Bound(1.0), Bound(1.0)), isAnomaly = true, 1.0)), - (4, AnomalyDetectionDataPoint(500, 500, Threshold(Bound(1.0), Bound(1.0)), isAnomaly = true, 1.0)) + (3, AnomalyDetectionDataPoint(1000, 1000, BoundedRange(Bound(1.0, inclusive = true), + Bound(1.0, inclusive = true)), isAnomaly = true, 1.0)), + (4, AnomalyDetectionDataPoint(500, 500, BoundedRange(Bound(1.0, inclusive = true), + Bound(1.0, inclusive = true)), isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -209,8 +215,8 @@ class BatchNormalStrategyTest extends WordSpec with Matchers { result.foreach { case (_, anom) => val value = anom.anomalyMetricValue - val upperBound = anom.anomalyThreshold.upperBound.value - val lowerBound = anom.anomalyThreshold.lowerBound.value + val upperBound = anom.anomalyCheckRange.upperBound.value + val lowerBound = anom.anomalyCheckRange.lowerBound.value assert(value < lowerBound || value > upperBound) } diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategyTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategyTest.scala index d9fdd4ebc..28f8ebdf7 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/OnlineNormalStrategyTest.scala @@ -16,7 +16,8 @@ package com.amazon.deequ.anomalydetection -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec import breeze.stats.meanAndVariance import scala.util.Random @@ -169,27 +170,38 @@ class OnlineNormalStrategyTest extends WordSpec with Matchers { val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (20, AnomalyDetectionDataPoint(data(20), data(20), - Threshold(Bound(-14.868489924421404), Bound(14.255383455388895)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-14.868489924421404, inclusive = true), Bound(14.255383455388895, inclusive = true)), + isAnomaly = true, 1.0)), (21, AnomalyDetectionDataPoint(data(21), data(21), - Threshold(Bound(-13.6338479733374), Bound(13.02074150430489)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-13.6338479733374, inclusive = true), Bound(13.02074150430489, inclusive = true)), + isAnomaly = true, 1.0)), (22, AnomalyDetectionDataPoint(data(22), data(22), - Threshold(Bound(-16.71733585267535), Bound(16.104229383642842)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-16.71733585267535, inclusive = true), Bound(16.104229383642842, inclusive = true)), + isAnomaly = true, 1.0)), (23, AnomalyDetectionDataPoint(data(23), data(23), - Threshold(Bound(-17.346915620547467), Bound(16.733809151514958)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-17.346915620547467, inclusive = true), Bound(16.733809151514958, inclusive = true)), + isAnomaly = true, 1.0)), (24, AnomalyDetectionDataPoint(data(24), data(24), - Threshold(Bound(-17.496117397890874), Bound(16.883010928858365)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-17.496117397890874, inclusive = true), Bound(16.883010928858365, inclusive = true)), + isAnomaly = true, 1.0)), (25, AnomalyDetectionDataPoint(data(25), data(25), - Threshold(Bound(-17.90391150851199), Bound(17.29080503947948)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-17.90391150851199, inclusive = true), Bound(17.29080503947948, inclusive = true)), + isAnomaly = true, 1.0)), (26, AnomalyDetectionDataPoint(data(26), data(26), - Threshold(Bound(-17.028892797350824), Bound(16.415786328318315)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-17.028892797350824, inclusive = true), Bound(16.415786328318315, inclusive = true)), + isAnomaly = true, 1.0)), (27, AnomalyDetectionDataPoint(data(27), data(27), - Threshold(Bound(-17.720100310354653), Bound(17.106993841322144)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-17.720100310354653, inclusive = true), Bound(17.106993841322144, inclusive = true)), + isAnomaly = true, 1.0)), (28, AnomalyDetectionDataPoint(data(28), data(28), - Threshold(Bound(-18.23663168508628), Bound(17.62352521605377)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-18.23663168508628, inclusive = true), Bound(17.62352521605377, inclusive = true)), + isAnomaly = true, 1.0)), (29, AnomalyDetectionDataPoint(data(29), data(29), - Threshold(Bound(-19.32641622778204), Bound(18.71330975874953)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-19.32641622778204, inclusive = true), Bound(18.71330975874953, inclusive = true)), + isAnomaly = true, 1.0)), (30, AnomalyDetectionDataPoint(data(30), data(30), - Threshold(Bound(-18.96540323993527), Bound(18.35229677090276)), isAnomaly = true, 1.0)) + BoundedRange(Bound(-18.96540323993527, inclusive = true), Bound(18.35229677090276, inclusive = true)), + isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -199,17 +211,23 @@ class OnlineNormalStrategyTest extends WordSpec with Matchers { val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (25, AnomalyDetectionDataPoint(data(25), data(25), - Threshold(Bound(-15.630116599125694), Bound(16.989221350098695)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-15.630116599125694, inclusive = true), Bound(16.989221350098695, inclusive = true)), + isAnomaly = true, 1.0)), (26, AnomalyDetectionDataPoint(data(26), data(26), - Threshold(Bound(-14.963376676338362), Bound(16.322481427311363)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-14.963376676338362, inclusive = true), Bound(16.322481427311363, inclusive = true)), + isAnomaly = true, 1.0)), (27, AnomalyDetectionDataPoint(data(27), data(27), - Threshold(Bound(-15.131834814393196), Bound(16.490939565366197)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-15.131834814393196, inclusive = true), Bound(16.490939565366197, inclusive = true)), + isAnomaly = true, 1.0)), (28, AnomalyDetectionDataPoint(data(28), data(28), - Threshold(Bound(-14.76810451038132), Bound(16.12720926135432)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-14.76810451038132, inclusive = true), Bound(16.12720926135432, inclusive = true)), + isAnomaly = true, 1.0)), (29, AnomalyDetectionDataPoint(data(29), data(29), - Threshold(Bound(-15.078145049879462), Bound(16.437249800852463)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-15.078145049879462, inclusive = true), Bound(16.437249800852463, inclusive = true)), + isAnomaly = true, 1.0)), (30, AnomalyDetectionDataPoint(data(30), data(30), - Threshold(Bound(-14.540171084298914), Bound(15.899275835271913)), isAnomaly = true, 1.0)) + BoundedRange(Bound(-14.540171084298914, inclusive = true), Bound(15.899275835271913, inclusive = true)), + isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -222,17 +240,23 @@ class OnlineNormalStrategyTest extends WordSpec with Matchers { // Anomalies with positive values only val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (20, AnomalyDetectionDataPoint(data(20), data(20), - Threshold(Bound(Double.NegativeInfinity), Bound(5.934276775443095)), isAnomaly = true, 1.0)), + BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), Bound(5.934276775443095, inclusive = true)), + isAnomaly = true, 1.0)), (22, AnomalyDetectionDataPoint(data(22), data(22), - Threshold(Bound(Double.NegativeInfinity), Bound(7.979098353666404)), isAnomaly = true, 1.0)), + BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), Bound(7.979098353666404, inclusive = true)), + isAnomaly = true, 1.0)), (24, AnomalyDetectionDataPoint(data(24), data(24), - Threshold(Bound(Double.NegativeInfinity), Bound(9.582136909647211)), isAnomaly = true, 1.0)), + BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), Bound(9.582136909647211, inclusive = true)), + isAnomaly = true, 1.0)), (26, AnomalyDetectionDataPoint(data(26), data(26), - Threshold(Bound(Double.NegativeInfinity), Bound(10.320400087389258)), isAnomaly = true, 1.0)), + BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), Bound(10.320400087389258, inclusive = true)), + isAnomaly = true, 1.0)), (28, AnomalyDetectionDataPoint(data(28), data(28), - Threshold(Bound(Double.NegativeInfinity), Bound(11.113502213504855)), isAnomaly = true, 1.0)), + BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), Bound(11.113502213504855, inclusive = true)), + isAnomaly = true, 1.0)), (30, AnomalyDetectionDataPoint(data(30), data(30), - Threshold(Bound(Double.NegativeInfinity), Bound(11.776810456746686)), isAnomaly = true, 1.0)) + BoundedRange(Bound(Double.NegativeInfinity, inclusive = true), Bound(11.776810456746686, inclusive = true)), + isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -245,15 +269,20 @@ class OnlineNormalStrategyTest extends WordSpec with Matchers { // Anomalies with negative values only val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (21, AnomalyDetectionDataPoint(data(21), data(21), - Threshold(Bound(-7.855820681098751), Bound(Double.PositiveInfinity)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-7.855820681098751, inclusive = true), Bound(Double.PositiveInfinity, inclusive = true)), + isAnomaly = true, 1.0)), (23, AnomalyDetectionDataPoint(data(23), data(23), - Threshold(Bound(-10.14631437278386), Bound(Double.PositiveInfinity)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-10.14631437278386, inclusive = true), Bound(Double.PositiveInfinity, inclusive = true)), + isAnomaly = true, 1.0)), (25, AnomalyDetectionDataPoint(data(25), data(25), - Threshold(Bound(-11.038751996286909), Bound(Double.PositiveInfinity)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-11.038751996286909, inclusive = true), Bound(Double.PositiveInfinity, inclusive = true)), + isAnomaly = true, 1.0)), (27, AnomalyDetectionDataPoint(data(27), data(27), - Threshold(Bound(-11.359107787232386), Bound(Double.PositiveInfinity)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-11.359107787232386, inclusive = true), Bound(Double.PositiveInfinity, inclusive = true)), + isAnomaly = true, 1.0)), (29, AnomalyDetectionDataPoint(data(29), data(29), - Threshold(Bound(-12.097995027317015), Bound(Double.PositiveInfinity)), isAnomaly = true, 1.0)) + BoundedRange(Bound(-12.097995027317015, inclusive = true), Bound(Double.PositiveInfinity, inclusive = true)), + isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -290,8 +319,8 @@ class OnlineNormalStrategyTest extends WordSpec with Matchers { result.foreach { case (_, anom) => val value = anom.anomalyMetricValue - val upperBound = anom.anomalyThreshold.upperBound.value - val lowerBound = anom.anomalyThreshold.lowerBound.value + val upperBound = anom.anomalyCheckRange.upperBound.value + val lowerBound = anom.anomalyCheckRange.lowerBound.value assert(value < lowerBound || value > upperBound) } diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/RateOfChangeStrategyTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/RateOfChangeStrategyTest.scala index d0e6ccba9..7c87b85ee 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/RateOfChangeStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/RateOfChangeStrategyTest.scala @@ -16,7 +16,8 @@ package com.amazon.deequ.anomalydetection -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec /** * The tested class RateOfChangeStrategy is deprecated. @@ -44,7 +45,7 @@ class RateOfChangeStrategyTest extends WordSpec with Matchers { "detect all anomalies if no interval specified" in { val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(Bound(-2.0), Bound(2.0)) + val expectedAnomalyThreshold = BoundedRange(Bound(-2.0, inclusive = true), Bound(2.0, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (20, AnomalyDetectionDataPoint(20, 19, expectedAnomalyThreshold, isAnomaly = true, 1.0)), (21, AnomalyDetectionDataPoint(-21, -41, expectedAnomalyThreshold, isAnomaly = true, 1.0)), diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/RelativeRateOfChangeStrategyTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/RelativeRateOfChangeStrategyTest.scala index c6da5ae2b..bd09d0e97 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/RelativeRateOfChangeStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/RelativeRateOfChangeStrategyTest.scala @@ -17,7 +17,8 @@ package com.amazon.deequ.anomalydetection import breeze.linalg.DenseVector -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { @@ -151,20 +152,20 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { "detect all anomalies if no interval specified" in { val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(Bound(0.5), Bound(2.0)) + val expectedAnomalyCheckRange = BoundedRange(Bound(0.5, inclusive = true), Bound(2.0, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (20, AnomalyDetectionDataPoint(20, 20, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (21, AnomalyDetectionDataPoint(1, 0.05, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (22, AnomalyDetectionDataPoint(22, 22, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (23, AnomalyDetectionDataPoint(1, 0.045454545454545456, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (24, AnomalyDetectionDataPoint(24, 24, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (25, AnomalyDetectionDataPoint(1, 0.041666666666666664, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(26, 26, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(1, 0.038461538461538464, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(28, 28, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(1, 0.03571428571428571, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(30, 30, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (31, AnomalyDetectionDataPoint(1, 0.03333333333333333, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (20, AnomalyDetectionDataPoint(20, 20, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (21, AnomalyDetectionDataPoint(1, 0.05, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (22, AnomalyDetectionDataPoint(22, 22, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (23, AnomalyDetectionDataPoint(1, 0.045454545454545456, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (24, AnomalyDetectionDataPoint(24, 24, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (25, AnomalyDetectionDataPoint(1, 0.041666666666666664, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(26, 26, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(1, 0.038461538461538464, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(28, 28, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(1, 0.03571428571428571, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(30, 30, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (31, AnomalyDetectionDataPoint(1, 0.03333333333333333, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -172,15 +173,15 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { "only detect anomalies in interval" in { val anomalyResult = strategy.detectWithExtendedResults(data, (25, 50)).filter({case (_, anom) => anom.isAnomaly}) - val expectedAnomalyThreshold = Threshold(Bound(0.5), Bound(2.0)) + val expectedAnomalyCheckRange = BoundedRange(Bound(0.5, inclusive = true), Bound(2.0, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (25, AnomalyDetectionDataPoint(1, 0.041666666666666664, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(26, 26, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(1, 0.038461538461538464, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(28, 28, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(1, 0.03571428571428571, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(30, 30, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (31, AnomalyDetectionDataPoint(1, 0.03333333333333333, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (25, AnomalyDetectionDataPoint(1, 0.041666666666666664, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(26, 26, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(1, 0.038461538461538464, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(28, 28, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(1, 0.03571428571428571, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(30, 30, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (31, AnomalyDetectionDataPoint(1, 0.03333333333333333, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -190,14 +191,15 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) // Anomalies with positive values only - val expectedAnomalyThreshold = Threshold(Bound(-1.7976931348623157E308), Bound(1.0)) + val expectedAnomalyCheckRange = BoundedRange(Bound(-1.7976931348623157E308, inclusive = true), + Bound(1.0, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (20, AnomalyDetectionDataPoint(20, 20, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (22, AnomalyDetectionDataPoint(22, 22, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (24, AnomalyDetectionDataPoint(24, 24, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (26, AnomalyDetectionDataPoint(26, 26, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (28, AnomalyDetectionDataPoint(28, 28, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (30, AnomalyDetectionDataPoint(30, 30, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (20, AnomalyDetectionDataPoint(20, 20, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (22, AnomalyDetectionDataPoint(22, 22, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (24, AnomalyDetectionDataPoint(24, 24, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (26, AnomalyDetectionDataPoint(26, 26, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (28, AnomalyDetectionDataPoint(28, 28, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (30, AnomalyDetectionDataPoint(30, 30, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -207,14 +209,15 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { val anomalyResult = strategy.detectWithExtendedResults(data).filter({case (_, anom) => anom.isAnomaly}) // Anomalies with negative values only - val expectedAnomalyThreshold = Threshold(Bound(0.5), Bound(1.7976931348623157E308)) + val expectedAnomalyCheckRange = BoundedRange(Bound(0.5, inclusive = true), + Bound(1.7976931348623157E308, inclusive = true)) val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( - (21, AnomalyDetectionDataPoint(1, 0.05, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (23, AnomalyDetectionDataPoint(1, 0.045454545454545456, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (25, AnomalyDetectionDataPoint(1, 0.041666666666666664, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (27, AnomalyDetectionDataPoint(1, 0.038461538461538464, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (29, AnomalyDetectionDataPoint(1, 0.03571428571428571, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (31, AnomalyDetectionDataPoint(1, 0.03333333333333333, expectedAnomalyThreshold, isAnomaly = true, 1.0)) + (21, AnomalyDetectionDataPoint(1, 0.05, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (23, AnomalyDetectionDataPoint(1, 0.045454545454545456, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (25, AnomalyDetectionDataPoint(1, 0.041666666666666664, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (27, AnomalyDetectionDataPoint(1, 0.038461538461538464, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (29, AnomalyDetectionDataPoint(1, 0.03571428571428571, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (31, AnomalyDetectionDataPoint(1, 0.03333333333333333, expectedAnomalyCheckRange, isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -234,9 +237,11 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (2, AnomalyDetectionDataPoint(3, Double.PositiveInfinity, - Threshold(Bound(-1.7976931348623157E308), Bound(8.0)), isAnomaly = true, 1.0)), + BoundedRange(Bound(-1.7976931348623157E308, inclusive = true), Bound(8.0, inclusive = true)), + isAnomaly = true, 1.0)), (5, AnomalyDetectionDataPoint(72, 12, - Threshold(Bound(-1.7976931348623157E308), Bound(8.0)), isAnomaly = true, 1.0)) + BoundedRange(Bound(-1.7976931348623157E308, inclusive = true), Bound(8.0, inclusive = true)), + isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -248,7 +253,8 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { val expectedResult: Seq[(Int, AnomalyDetectionDataPoint)] = Seq( (5, AnomalyDetectionDataPoint(72, 12, - Threshold(Bound(-1.7976931348623157E308), Bound(8.0)), isAnomaly = true, 1.0)) + BoundedRange(Bound(-1.7976931348623157E308, inclusive = true), Bound(8.0, inclusive = true)), + isAnomaly = true, 1.0)) ) assert(anomalyResult == expectedResult) } @@ -277,8 +283,8 @@ class RelativeRateOfChangeStrategyTest extends WordSpec with Matchers { result.foreach { case (_, anom) => val value = anom.anomalyMetricValue - val upperBound = anom.anomalyThreshold.upperBound.value - val lowerBound = anom.anomalyThreshold.lowerBound.value + val upperBound = anom.anomalyCheckRange.upperBound.value + val lowerBound = anom.anomalyCheckRange.lowerBound.value assert(value < lowerBound || value > upperBound) } diff --git a/src/test/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategyTest.scala b/src/test/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategyTest.scala index f8396c677..28d49d4c2 100644 --- a/src/test/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategyTest.scala +++ b/src/test/scala/com/amazon/deequ/anomalydetection/SimpleThresholdStrategyTest.scala @@ -16,7 +16,8 @@ package com.amazon.deequ.anomalydetection -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec class SimpleThresholdStrategyTest extends WordSpec with Matchers { @@ -73,10 +74,11 @@ class SimpleThresholdStrategyTest extends WordSpec with Matchers { "Simple Threshold Strategy with Extended Results" should { val (strategy, data) = setupDefaultStrategyAndData() - val expectedAnomalyThreshold = Threshold(upperBound = Bound(1.0)) + val expectedAnomalyCheckRange = BoundedRange(lowerBound = Bound(Double.MinValue, inclusive = true), + upperBound = Bound(1.0, inclusive = true)) val expectedResult = Seq( - (1, AnomalyDetectionDataPoint(2.0, 2.0, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, expectedAnomalyThreshold, isAnomaly = true, 1.0))) + (1, AnomalyDetectionDataPoint(2.0, 2.0, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, expectedAnomalyCheckRange, isAnomaly = true, 1.0))) "detect values above threshold" in { val anomalyResult = @@ -102,11 +104,11 @@ class SimpleThresholdStrategyTest extends WordSpec with Matchers { "work with upper and lower threshold" in { val tS = SimpleThresholdStrategy(lowerBound = -0.5, upperBound = 1.0) val anomalyResult = tS.detectWithExtendedResults(data).filter({ case (_, anom) => anom.isAnomaly }) - val expectedAnomalyThreshold = Threshold(Bound(-0.5), Bound(1.0)) + val expectedAnomalyCheckRange = BoundedRange(Bound(-0.5, inclusive = true), Bound(1.0, inclusive = true)) val expectedResult = Seq( - (0, AnomalyDetectionDataPoint(-1.0, -1.0, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, expectedAnomalyThreshold, isAnomaly = true, 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, expectedAnomalyThreshold, isAnomaly = true, 1.0))) + (0, AnomalyDetectionDataPoint(-1.0, -1.0, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, expectedAnomalyCheckRange, isAnomaly = true, 1.0)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, expectedAnomalyCheckRange, isAnomaly = true, 1.0))) assert(anomalyResult == expectedResult) } @@ -128,8 +130,8 @@ class SimpleThresholdStrategyTest extends WordSpec with Matchers { result.foreach { case (_, anom) => val value = anom.anomalyMetricValue - val upperBound = anom.anomalyThreshold.upperBound.value - val lowerBound = anom.anomalyThreshold.lowerBound.value + val upperBound = anom.anomalyCheckRange.upperBound.value + val lowerBound = anom.anomalyCheckRange.lowerBound.value assert(value < lowerBound || value > upperBound) } diff --git a/src/test/scala/com/amazon/deequ/checks/CheckTest.scala b/src/test/scala/com/amazon/deequ/checks/CheckTest.scala index e768d0f37..31c5209d7 100644 --- a/src/test/scala/com/amazon/deequ/checks/CheckTest.scala +++ b/src/test/scala/com/amazon/deequ/checks/CheckTest.scala @@ -25,6 +25,8 @@ import com.amazon.deequ.anomalydetection.AnomalyDetectionAssertionResult import com.amazon.deequ.anomalydetection.AnomalyDetectionDataPoint import com.amazon.deequ.anomalydetection.AnomalyDetectionStrategy import com.amazon.deequ.anomalydetection.AnomalyDetectionStrategyWithExtendedResults +import com.amazon.deequ.anomalydetection.Bound +import com.amazon.deequ.anomalydetection.BoundedRange import com.amazon.deequ.anomalydetection.ExtendedDetectionResult import com.amazon.deequ.checks.Check.getNewestPointAnomalyResults import com.amazon.deequ.constraints.ConstrainableDataTypes @@ -53,6 +55,10 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix import CheckTest._ + // This is used as a default bounded range value for anomaly detection tests. + private[this] val defaultBoundedRange = BoundedRange(lowerBound = Bound(0.0, inclusive = true), + upperBound = Bound(1.0, inclusive = true)) + "Check" should { "return the correct check status for completeness" in withSparkSession { sparkSession => @@ -1177,38 +1183,38 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix (fakeAnomalyDetector.detectWithExtendedResults _) .expects(Vector(1.0, 2.0, 3.0, 4.0, 11.0), (4, 5)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3, AnomalyDetectionDataPoint(4.0, 4.0, confidence = 1.0)), - (4, AnomalyDetectionDataPoint(11.0, 11.0, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (3, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (4, AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)))) .once() (fakeAnomalyDetector.detectWithExtendedResults _).expects(Vector(1.0, 2.0, 3.0, 4.0, 4.0), (4, 5)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3, AnomalyDetectionDataPoint(4.0, 4.0, confidence = 1.0)), - (4, AnomalyDetectionDataPoint(4.0, 4.0, isAnomaly = true, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (3, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (4, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)))) .once() // Distinctness results (fakeAnomalyDetector.detectWithExtendedResults _) .expects(Vector(1.0, 2.0, 3.0, 4.0, 1), (4, 5)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3, AnomalyDetectionDataPoint(4.0, 4.0, confidence = 1.0)), - (4, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (3, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (4, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)))) .once() (fakeAnomalyDetector.detectWithExtendedResults _) .expects(Vector(1.0, 2.0, 3.0, 4.0, 1), (4, 5)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (3, AnomalyDetectionDataPoint(4.0, 4.0, confidence = 1.0)), - (4, AnomalyDetectionDataPoint(1.0, 1.0, isAnomaly = true, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (3, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (4, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)))) .once() } @@ -1249,15 +1255,15 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix (fakeAnomalyDetector.detectWithExtendedResults _) .expects(Vector(1.0, 2.0, 11.0), (2, 3)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(11.0, 11.0, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)))) .once() (fakeAnomalyDetector.detectWithExtendedResults _).expects(Vector(1.0, 2.0, 4.0), (2, 3)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(4.0, 4.0, isAnomaly = true, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)))) .once() } @@ -1289,15 +1295,15 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix (fakeAnomalyDetector.detectWithExtendedResults _) .expects(Vector(3.0, 4.0, 11.0), (2, 3)) .returns(Seq( - (0, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(4.0, 4.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(11.0, 11.0, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)))) .once() (fakeAnomalyDetector.detectWithExtendedResults _).expects(Vector(3.0, 4.0, 4.0), (2, 3)) .returns(Seq( - (0, AnomalyDetectionDataPoint(3.0, 3.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(4.0, 4.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(4.0, 4.0, isAnomaly = true, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(3.0, 3.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)))) .once() } @@ -1329,15 +1335,15 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix (fakeAnomalyDetector.detectWithExtendedResults _) .expects(Vector(1.0, 2.0, 11.0), (2, 3)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(11.0, 11.0, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)))) .once() (fakeAnomalyDetector.detectWithExtendedResults _).expects(Vector(1.0, 2.0, 4.0), (2, 3)) .returns(Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(4.0, 4.0, isAnomaly = true, confidence = 1.0)))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(4.0, 4.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)))) .once() } @@ -1364,26 +1370,26 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix "with multiple data points" in { val anomalySequence: Seq[(Long, AnomalyDetectionDataPoint)] = Seq( - (0, AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)), - (1, AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)), - (2, AnomalyDetectionDataPoint(11.0, 11.0, isAnomaly = true, confidence = 1.0))) + (0, AnomalyDetectionDataPoint(1.0, 1.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (1, AnomalyDetectionDataPoint(2.0, 2.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)), + (2, AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true))) val result: AnomalyDetectionAssertionResult = getNewestPointAnomalyResults(ExtendedDetectionResult(anomalySequence)) - assert(!result.hasNoAnomaly) - assert(result.anomalyDetectionExtendedResult.anomalyDetectionDataPoints.head == - AnomalyDetectionDataPoint(11.0, 11.0, isAnomaly = true, confidence = 1.0)) + assert(result.hasAnomaly) + assert(result.anomalyDetectionExtendedResult.anomalyDetectionDataPoint == + AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = true)) } "getNewestPointAnomalyResults returns correct assertion result from anomaly detection data point sequence " + "with one data point" in { val anomalySequence: Seq[(Long, AnomalyDetectionDataPoint)] = Seq( - (0, AnomalyDetectionDataPoint(11.0, 11.0, confidence = 1.0))) + (0, AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false))) val result: AnomalyDetectionAssertionResult = getNewestPointAnomalyResults(ExtendedDetectionResult(anomalySequence)) - assert(result.hasNoAnomaly) - assert(result.anomalyDetectionExtendedResult.anomalyDetectionDataPoints.head == - AnomalyDetectionDataPoint(11.0, 11.0, confidence = 1.0)) + assert(!result.hasAnomaly) + assert(result.anomalyDetectionExtendedResult.anomalyDetectionDataPoint == + AnomalyDetectionDataPoint(11.0, 11.0, defaultBoundedRange, confidence = 1.0, isAnomaly = false)) } "assert getNewestPointAnomalyResults throws exception from empty anomaly detection sequence" in { diff --git a/src/test/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraintTest.scala b/src/test/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraintTest.scala index 213123a74..606b1966b 100644 --- a/src/test/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraintTest.scala +++ b/src/test/scala/com/amazon/deequ/constraints/AnomalyExtendedResultsConstraintTest.scala @@ -19,7 +19,11 @@ package com.amazon.deequ.constraints import com.amazon.deequ.SparkContextSpec import com.amazon.deequ.analyzers._ import com.amazon.deequ.analyzers.runners.MetricCalculationException -import com.amazon.deequ.anomalydetection.{AnomalyDetectionAssertionResult, AnomalyDetectionDataPoint, AnomalyDetectionExtendedResult} +import com.amazon.deequ.anomalydetection.AnomalyDetectionAssertionResult +import com.amazon.deequ.anomalydetection.AnomalyDetectionDataPoint +import com.amazon.deequ.anomalydetection.AnomalyDetectionExtendedResult +import com.amazon.deequ.anomalydetection.Bound +import com.amazon.deequ.anomalydetection.BoundedRange import com.amazon.deequ.constraints.ConstraintUtils.calculate import com.amazon.deequ.metrics.{DoubleMetric, Entity, Metric} import com.amazon.deequ.utils.FixtureSupport @@ -54,7 +58,8 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S override def calculate( data: DataFrame, stateLoader: Option[StateLoader], - statePersister: Option[StatePersister]) + statePersister: Option[StatePersister], + filterCondition: Option[String]) : DoubleMetric = { val value: Try[Double] = Try { require(data.columns.contains(column), s"Missing column $column") @@ -63,11 +68,11 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S DoubleMetric(Entity.Column, "sample", column, value) } - override def computeStateFrom(data: DataFrame): Option[NumMatches] = { + override def computeStateFrom(data: DataFrame, filterCondition: Option[String] = None) + : Option[NumMatches] = { throw new NotImplementedError() } - override def computeMetricFrom(state: Option[NumMatches]): DoubleMetric = { throw new NotImplementedError() } @@ -75,15 +80,20 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S "Anomaly extended results constraint" should { + val defaultBoundedRange = BoundedRange(lowerBound = Bound(0.0, inclusive = true), + upperBound = Bound(1.0, inclusive = true)) + "assert correctly on values if analysis is successful" in withSparkSession { sparkSession => val df = getDfMissing(sparkSession) // Analysis result should equal to 1.0 for an existing column - val anomalyAssertionFunctionA = (metric: Double) => { - AnomalyDetectionAssertionResult(metric == 1.0, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) + val anomalyAssertionFunctionA = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = false, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = false)) + ) } val resultA = calculate( @@ -94,10 +104,10 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S assert(resultA.message.isEmpty) assert(resultA.metric.isDefined) - val anomalyAssertionFunctionB = (metric: Double) => { - AnomalyDetectionAssertionResult(metric != 1.0, - AnomalyDetectionExtendedResult( - Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, isAnomaly = true)))) + val anomalyAssertionFunctionB = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = true, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = true))) } // Analysis result should equal to 1.0 for an existing column @@ -126,9 +136,11 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S val df = getDfMissing(sparkSession) - val anomalyAssertionFunctionA = (metric: Double) => { - AnomalyDetectionAssertionResult(metric == 2.0, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)))) + val anomalyAssertionFunctionA = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = false, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = false)) + ) } // Analysis result should equal to 100.0 for an existing column @@ -136,10 +148,10 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S SampleAnalyzer("att1"), anomalyAssertionFunctionA, Some(valueDoubler)), df).status == ConstraintStatus.Success) - val anomalyAssertionFunctionB = (metric: Double) => { - AnomalyDetectionAssertionResult(metric != 2.0, - AnomalyDetectionExtendedResult( - Seq(AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0, isAnomaly = true)))) + val anomalyAssertionFunctionB = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = true, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = true))) } assert(calculate(AnomalyExtendedResultsConstraint[NumMatches, Double, Double]( @@ -164,14 +176,15 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S SampleAnalyzer("someMissingColumn") -> SampleAnalyzer("someMissingColumn").calculate(df) ) - val anomalyAssertionFunctionA = (metric: Double) => { - AnomalyDetectionAssertionResult(metric == 1.0, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) + val anomalyAssertionFunctionA = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = false, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = false))) } - val anomalyAssertionFunctionB = (metric: Double) => { - AnomalyDetectionAssertionResult(metric != 1.0, - AnomalyDetectionExtendedResult( - Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, isAnomaly = true)))) + val anomalyAssertionFunctionB = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = true, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = true))) } // Analysis result should equal to 1.0 for an existing column @@ -202,9 +215,10 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S val validResults = Map[Analyzer[_, Metric[_]], Metric[_]]( SampleAnalyzer("att1") -> SampleAnalyzer("att1").calculate(df)) - val anomalyAssertionFunction = (metric: Double) => { - AnomalyDetectionAssertionResult(metric == 2.0, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0)))) + val anomalyAssertionFunction = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = false, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(2.0, 2.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = false))) } assert(AnomalyExtendedResultsConstraint[NumMatches, Double, Double]( @@ -224,9 +238,10 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S val validResults = Map[Analyzer[_, Metric[_]], Metric[_]]( SampleAnalyzer("att1") -> SampleAnalyzer("att1").calculate(df)) - val anomalyAssertionFunction = (metric: Double) => { - AnomalyDetectionAssertionResult(metric == 1.0, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) + val anomalyAssertionFunction = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = false, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = false))) } val constraint = AnomalyExtendedResultsConstraint[NumMatches, Double, Double]( SampleAnalyzer("att1"), anomalyAssertionFunction, Some(problematicValuePicker)) @@ -260,10 +275,10 @@ class AnomalyExtendedResultsConstraintTest extends WordSpec with Matchers with S val df = getDfMissing(sparkSession) - val anomalyAssertionFunction = (metric: Double) => { - AnomalyDetectionAssertionResult(metric == 0.9, - AnomalyDetectionExtendedResult( - Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, isAnomaly = true)))) + val anomalyAssertionFunction = (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = true, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = true))) } val failingConstraint = AnomalyExtendedResultsConstraint[NumMatches, Double, Double]( diff --git a/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala b/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala index ac426ef55..cd8d91d91 100644 --- a/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala +++ b/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala @@ -18,14 +18,21 @@ package com.amazon.deequ package constraints import com.amazon.deequ.utils.FixtureSupport -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.Matchers +import org.scalatest.WordSpec import ConstraintUtils.calculate -import com.amazon.deequ.analyzers.{Completeness, NumMatchesAndCount} +import com.amazon.deequ.analyzers.Completeness +import com.amazon.deequ.analyzers.NumMatchesAndCount import org.apache.spark.sql.Row -import org.apache.spark.sql.types.{DoubleType, StringType} +import org.apache.spark.sql.types.DoubleType +import org.apache.spark.sql.types.StringType import Constraint._ import com.amazon.deequ.SparkContextSpec -import com.amazon.deequ.anomalydetection.{AnomalyDetectionAssertionResult, AnomalyDetectionDataPoint, AnomalyDetectionExtendedResult} +import com.amazon.deequ.anomalydetection.AnomalyDetectionAssertionResult +import com.amazon.deequ.anomalydetection.AnomalyDetectionDataPoint +import com.amazon.deequ.anomalydetection.AnomalyDetectionExtendedResult +import com.amazon.deequ.anomalydetection.Bound +import com.amazon.deequ.anomalydetection.BoundedRange class ConstraintsTest extends WordSpec with Matchers with SparkContextSpec with FixtureSupport { @@ -179,31 +186,26 @@ class ConstraintsTest extends WordSpec with Matchers with SparkContextSpec with "Anomaly constraint with Extended Results" should { "assert on anomaly analyzer values" in withSparkSession { sparkSession => val df = getDfMissing(sparkSession) + val defaultBoundedRange = BoundedRange(lowerBound = Bound(0.0, inclusive = true), + upperBound = Bound(1.0, inclusive = true)) + assert(calculate(Constraint.anomalyConstraintWithExtendedResults[NumMatchesAndCount]( - Completeness("att1"), (metric: Double) => { - AnomalyDetectionAssertionResult(metric > 0.4, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) + Completeness("att1"), (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = false, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = false))) } ), df) .status == ConstraintStatus.Success) - assert(calculate(Constraint.anomalyConstraintWithExtendedResults[NumMatchesAndCount]( - Completeness("att1"), (metric: Double) => { - AnomalyDetectionAssertionResult(metric < 0.4, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) - }), df) - .status == ConstraintStatus.Failure) assert(calculate(Constraint.anomalyConstraintWithExtendedResults[NumMatchesAndCount]( - Completeness("att2"), (metric: Double) => { - AnomalyDetectionAssertionResult(metric > 0.7, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) - }), df) - .status == ConstraintStatus.Success) - assert(calculate(Constraint.anomalyConstraintWithExtendedResults[NumMatchesAndCount]( - Completeness("att2"), (metric: Double) => { - AnomalyDetectionAssertionResult(metric < 0.7, - AnomalyDetectionExtendedResult(Seq(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0)))) + Completeness("att1"), (_: Double) => { + AnomalyDetectionAssertionResult(hasAnomaly = true, + AnomalyDetectionExtendedResult(AnomalyDetectionDataPoint(1.0, 1.0, confidence = 1.0, + anomalyCheckRange = defaultBoundedRange, isAnomaly = true) + )) }), df) .status == ConstraintStatus.Failure) + } } }