Skip to content

Commit

Permalink
Style fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
eycho-am committed Feb 13, 2024
1 parent 274a446 commit a111fc5
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -353,4 +353,4 @@ case class AnomalyCheckConfig(
description: String,
withTagValues: Map[String, String] = Map.empty,
afterDate: Option[Long] = None,
beforeDate: Option[Long] = None)
beforeDate: Option[Long] = None)
3 changes: 2 additions & 1 deletion src/main/scala/com/amazon/deequ/analyzers/Uniqueness.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ case class Uniqueness(columns: Seq[String], where: Option[String] = None)
val conditionColumn = where.map { expression => expr(expression) }
val fullColumnUniqueness = conditionColumn.map {
condition => {
when(not(condition), expr(rowLevelFilterTreatment.toString)).when((fullColumn.getOrElse(null)).equalTo(1), true).otherwise(false)
when(not(condition), expr(rowLevelFilterTreatment.toString))
.when(fullColumn.getOrElse(null).equalTo(1), true).otherwise(false)
}
}.getOrElse(when((fullColumn.getOrElse(null)).equalTo(1), true).otherwise(false))
super.fromAggregationResult(result, offset, Option(fullColumnUniqueness))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
/**
* Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not
* use this file except in compliance with the License. A copy of the License
* is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.deequ.utilities
import com.amazon.deequ.utilities.FilteredRow.FilteredRow

Expand Down Expand Up @@ -29,4 +45,4 @@ class RowLevelFilterTreatmentImpl(initialFilterTreatment: FilteredRow) extends R
object FilteredRow extends Enumeration {
type FilteredRow = Value
val NULL, TRUE = Value
}
}
26 changes: 18 additions & 8 deletions src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,17 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec
assert(Seq(true, true, true, false, false, false).sameElements(rowLevel8))
}

"generate a result that contains row-level results with filter with true for filtered rows" in withSparkSession { session =>
"generate a result that contains row-level results with true for filtered rows" in withSparkSession { session =>
val data = getDfCompleteAndInCompleteColumns(session)

val completeness = new Check(CheckLevel.Error, "rule1").hasCompleteness("att2", _ > 0.7, None).where("att1 = \"a\"")
val uniqueness = new Check(CheckLevel.Error, "rule2").hasUniqueness("att1", _ > 0.5, None)
val uniquenessWhere = new Check(CheckLevel.Error, "rule3").isUnique("att1").where("item < 3")
val completeness = new Check(CheckLevel.Error, "rule1")
.hasCompleteness("att2", _ > 0.7, None)
.where("att1 = \"a\"")
val uniqueness = new Check(CheckLevel.Error, "rule2")
.hasUniqueness("att1", _ > 0.5, None)
val uniquenessWhere = new Check(CheckLevel.Error, "rule3")
.isUnique("att1")
.where("item < 3")
val expectedColumn1 = completeness.description
val expectedColumn2 = uniqueness.description
val expectedColumn3 = uniquenessWhere.description
Expand Down Expand Up @@ -341,12 +346,17 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec

}

"generate a result that contains row-level results with filter with null for filtered rows" in withSparkSession { session =>
"generate a result that contains row-level results with null for filtered rows" in withSparkSession { session =>
val data = getDfCompleteAndInCompleteColumns(session)

val completeness = new Check(CheckLevel.Error, "rule1").hasCompleteness("att2", _ > 0.7, None).where("att1 = \"a\"")
val uniqueness = new Check(CheckLevel.Error, "rule2").hasUniqueness("att1", _ > 0.5, None)
val uniquenessWhere = new Check(CheckLevel.Error, "rule3").isUnique("att1").where("item < 3")
val completeness = new Check(CheckLevel.Error, "rule1")
.hasCompleteness("att2", _ > 0.7, None)
.where("att1 = \"a\"")
val uniqueness = new Check(CheckLevel.Error, "rule2")
.hasUniqueness("att1", _ > 0.5, None)
val uniquenessWhere = new Check(CheckLevel.Error, "rule3")
.isUnique("att1")
.where("item < 3")
val expectedColumn1 = completeness.description
val expectedColumn2 = uniqueness.description
val expectedColumn3 = uniquenessWhere.description
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class CompletenessTest extends AnyWordSpec with Matchers with SparkContextSpec w

val data = getDfCompleteAndInCompleteColumns(session)

// Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder
// Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder
val completenessAtt2 = Completeness("att2", Option("att1 = \"a\"")).withRowLevelFilterTreatment(FilteredRow.NULL)
val state = completenessAtt2.computeStateFrom(data)
val metric: DoubleMetric with FullColumn = completenessAtt2.computeMetricFrom(state)
Expand All @@ -60,7 +60,7 @@ class CompletenessTest extends AnyWordSpec with Matchers with SparkContextSpec w

val data = getDfCompleteAndInCompleteColumns(session)

// Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder
// Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder
val completenessAtt2 = Completeness("att2", Option("att1 = \"a\"")).withRowLevelFilterTreatment(FilteredRow.TRUE)
val state = completenessAtt2.computeStateFrom(data)
val metric: DoubleMetric with FullColumn = completenessAtt2.computeMetricFrom(state)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class UniquenessTest extends AnyWordSpec with Matchers with SparkContextSpec wit

val data = getDfWithUniqueColumns(session)

// Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder
// Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder
val addressLength = Uniqueness(Seq("onlyUniqueWithOtherNonUnique"), Option("unique < 4"))
.withRowLevelFilterTreatment(FilteredRow.TRUE)
val state: Option[FrequenciesAndNumRows] = addressLength.computeStateFrom(data, Option("unique < 4"))
Expand All @@ -172,7 +172,7 @@ class UniquenessTest extends AnyWordSpec with Matchers with SparkContextSpec wit

val data = getDfWithUniqueColumns(session)

// Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder
// Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder
val addressLength = Uniqueness(Seq("halfUniqueCombinedWithNonUnique", "nonUnique"), Option("unique > 2"))
.withRowLevelFilterTreatment(FilteredRow.TRUE)
val state: Option[FrequenciesAndNumRows] = addressLength.computeStateFrom(data, Option("unique > 2"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ class AnalysisRunnerTests extends AnyWordSpec
assert(numCombinedJobs == analyzers.length * 2)
// assert(separateResults == runnerResults.toString)
// Used to be tested with the above line, but adding filters changed the order of the results.
assert(separateResults.asInstanceOf[Set[DoubleMetric]].size == runnerResults.asInstanceOf[Set[DoubleMetric]].size)
assert(separateResults.asInstanceOf[Set[DoubleMetric]].size ==
runnerResults.asInstanceOf[Set[DoubleMetric]].size)
separateResults.asInstanceOf[Set[DoubleMetric]].foreach( result => {
assert(runnerResults.toString.contains(result.toString))
}
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/com/amazon/deequ/checks/CheckTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix
assertEvaluatesTo(numericRangeCheck9, numericRangeResults, CheckStatus.Success)
}

"correctly evaluate range constraints when values have single quote(') in string" in withSparkSession { sparkSession =>
"correctly evaluate range constraints when values have single quote in string" in withSparkSession { sparkSession =>
val rangeCheck = Check(CheckLevel.Error, "a")
.isContainedIn("att2", Array("can't", "help", "but", "wouldn't"))

Expand Down

0 comments on commit a111fc5

Please sign in to comment.