From a111fc531c69b03f57358f5f5e35e01594cebba2 Mon Sep 17 00:00:00 2001 From: Edward Cho Date: Tue, 13 Feb 2024 10:51:42 -0500 Subject: [PATCH] Style fixes --- .../amazon/deequ/VerificationRunBuilder.scala | 2 +- .../amazon/deequ/analyzers/Uniqueness.scala | 3 ++- .../utilities/RowLevelFilterTreatement.scala | 18 ++++++++++++- .../amazon/deequ/VerificationSuiteTest.scala | 26 +++++++++++++------ .../deequ/analyzers/CompletenessTest.scala | 4 +-- .../deequ/analyzers/UniquenessTest.scala | 4 +-- .../runners/AnalysisRunnerTests.scala | 3 ++- .../com/amazon/deequ/checks/CheckTest.scala | 2 +- 8 files changed, 45 insertions(+), 17 deletions(-) diff --git a/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala b/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala index a4724dcf..55800080 100644 --- a/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala +++ b/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala @@ -353,4 +353,4 @@ case class AnomalyCheckConfig( description: String, withTagValues: Map[String, String] = Map.empty, afterDate: Option[Long] = None, - beforeDate: Option[Long] = None) \ No newline at end of file + beforeDate: Option[Long] = None) diff --git a/src/main/scala/com/amazon/deequ/analyzers/Uniqueness.scala b/src/main/scala/com/amazon/deequ/analyzers/Uniqueness.scala index f87069d2..f62476da 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/Uniqueness.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/Uniqueness.scala @@ -42,7 +42,8 @@ case class Uniqueness(columns: Seq[String], where: Option[String] = None) val conditionColumn = where.map { expression => expr(expression) } val fullColumnUniqueness = conditionColumn.map { condition => { - when(not(condition), expr(rowLevelFilterTreatment.toString)).when((fullColumn.getOrElse(null)).equalTo(1), true).otherwise(false) + when(not(condition), expr(rowLevelFilterTreatment.toString)) + .when(fullColumn.getOrElse(null).equalTo(1), true).otherwise(false) } }.getOrElse(when((fullColumn.getOrElse(null)).equalTo(1), true).otherwise(false)) super.fromAggregationResult(result, offset, Option(fullColumnUniqueness)) diff --git a/src/main/scala/com/amazon/deequ/utilities/RowLevelFilterTreatement.scala b/src/main/scala/com/amazon/deequ/utilities/RowLevelFilterTreatement.scala index 8a2459bf..45ce0ce9 100644 --- a/src/main/scala/com/amazon/deequ/utilities/RowLevelFilterTreatement.scala +++ b/src/main/scala/com/amazon/deequ/utilities/RowLevelFilterTreatement.scala @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not + * use this file except in compliance with the License. A copy of the License + * is located at + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * + */ + package com.amazon.deequ.utilities import com.amazon.deequ.utilities.FilteredRow.FilteredRow @@ -29,4 +45,4 @@ class RowLevelFilterTreatmentImpl(initialFilterTreatment: FilteredRow) extends R object FilteredRow extends Enumeration { type FilteredRow = Value val NULL, TRUE = Value -} \ No newline at end of file +} diff --git a/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala b/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala index 1975945b..d1b5aeae 100644 --- a/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala +++ b/src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala @@ -304,12 +304,17 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec assert(Seq(true, true, true, false, false, false).sameElements(rowLevel8)) } - "generate a result that contains row-level results with filter with true for filtered rows" in withSparkSession { session => + "generate a result that contains row-level results with true for filtered rows" in withSparkSession { session => val data = getDfCompleteAndInCompleteColumns(session) - val completeness = new Check(CheckLevel.Error, "rule1").hasCompleteness("att2", _ > 0.7, None).where("att1 = \"a\"") - val uniqueness = new Check(CheckLevel.Error, "rule2").hasUniqueness("att1", _ > 0.5, None) - val uniquenessWhere = new Check(CheckLevel.Error, "rule3").isUnique("att1").where("item < 3") + val completeness = new Check(CheckLevel.Error, "rule1") + .hasCompleteness("att2", _ > 0.7, None) + .where("att1 = \"a\"") + val uniqueness = new Check(CheckLevel.Error, "rule2") + .hasUniqueness("att1", _ > 0.5, None) + val uniquenessWhere = new Check(CheckLevel.Error, "rule3") + .isUnique("att1") + .where("item < 3") val expectedColumn1 = completeness.description val expectedColumn2 = uniqueness.description val expectedColumn3 = uniquenessWhere.description @@ -341,12 +346,17 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec } - "generate a result that contains row-level results with filter with null for filtered rows" in withSparkSession { session => + "generate a result that contains row-level results with null for filtered rows" in withSparkSession { session => val data = getDfCompleteAndInCompleteColumns(session) - val completeness = new Check(CheckLevel.Error, "rule1").hasCompleteness("att2", _ > 0.7, None).where("att1 = \"a\"") - val uniqueness = new Check(CheckLevel.Error, "rule2").hasUniqueness("att1", _ > 0.5, None) - val uniquenessWhere = new Check(CheckLevel.Error, "rule3").isUnique("att1").where("item < 3") + val completeness = new Check(CheckLevel.Error, "rule1") + .hasCompleteness("att2", _ > 0.7, None) + .where("att1 = \"a\"") + val uniqueness = new Check(CheckLevel.Error, "rule2") + .hasUniqueness("att1", _ > 0.5, None) + val uniquenessWhere = new Check(CheckLevel.Error, "rule3") + .isUnique("att1") + .where("item < 3") val expectedColumn1 = completeness.description val expectedColumn2 = uniqueness.description val expectedColumn3 = uniquenessWhere.description diff --git a/src/test/scala/com/amazon/deequ/analyzers/CompletenessTest.scala b/src/test/scala/com/amazon/deequ/analyzers/CompletenessTest.scala index f7084ccb..cb2778a1 100644 --- a/src/test/scala/com/amazon/deequ/analyzers/CompletenessTest.scala +++ b/src/test/scala/com/amazon/deequ/analyzers/CompletenessTest.scala @@ -45,7 +45,7 @@ class CompletenessTest extends AnyWordSpec with Matchers with SparkContextSpec w val data = getDfCompleteAndInCompleteColumns(session) - // Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder + // Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder val completenessAtt2 = Completeness("att2", Option("att1 = \"a\"")).withRowLevelFilterTreatment(FilteredRow.NULL) val state = completenessAtt2.computeStateFrom(data) val metric: DoubleMetric with FullColumn = completenessAtt2.computeMetricFrom(state) @@ -60,7 +60,7 @@ class CompletenessTest extends AnyWordSpec with Matchers with SparkContextSpec w val data = getDfCompleteAndInCompleteColumns(session) - // Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder + // Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder val completenessAtt2 = Completeness("att2", Option("att1 = \"a\"")).withRowLevelFilterTreatment(FilteredRow.TRUE) val state = completenessAtt2.computeStateFrom(data) val metric: DoubleMetric with FullColumn = completenessAtt2.computeMetricFrom(state) diff --git a/src/test/scala/com/amazon/deequ/analyzers/UniquenessTest.scala b/src/test/scala/com/amazon/deequ/analyzers/UniquenessTest.scala index bd4a39af..7be9b4b3 100644 --- a/src/test/scala/com/amazon/deequ/analyzers/UniquenessTest.scala +++ b/src/test/scala/com/amazon/deequ/analyzers/UniquenessTest.scala @@ -155,7 +155,7 @@ class UniquenessTest extends AnyWordSpec with Matchers with SparkContextSpec wit val data = getDfWithUniqueColumns(session) - // Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder + // Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder val addressLength = Uniqueness(Seq("onlyUniqueWithOtherNonUnique"), Option("unique < 4")) .withRowLevelFilterTreatment(FilteredRow.TRUE) val state: Option[FrequenciesAndNumRows] = addressLength.computeStateFrom(data, Option("unique < 4")) @@ -172,7 +172,7 @@ class UniquenessTest extends AnyWordSpec with Matchers with SparkContextSpec wit val data = getDfWithUniqueColumns(session) - // Explicitly setting RowLevelFilterTreatment for test purposes, but this should be set at the VerificationRunBuilder + // Explicitly setting RowLevelFilterTreatment for test purposes, this should be set at the VerificationRunBuilder val addressLength = Uniqueness(Seq("halfUniqueCombinedWithNonUnique", "nonUnique"), Option("unique > 2")) .withRowLevelFilterTreatment(FilteredRow.TRUE) val state: Option[FrequenciesAndNumRows] = addressLength.computeStateFrom(data, Option("unique > 2")) diff --git a/src/test/scala/com/amazon/deequ/analyzers/runners/AnalysisRunnerTests.scala b/src/test/scala/com/amazon/deequ/analyzers/runners/AnalysisRunnerTests.scala index 66bfd969..31b7365a 100644 --- a/src/test/scala/com/amazon/deequ/analyzers/runners/AnalysisRunnerTests.scala +++ b/src/test/scala/com/amazon/deequ/analyzers/runners/AnalysisRunnerTests.scala @@ -202,7 +202,8 @@ class AnalysisRunnerTests extends AnyWordSpec assert(numCombinedJobs == analyzers.length * 2) // assert(separateResults == runnerResults.toString) // Used to be tested with the above line, but adding filters changed the order of the results. - assert(separateResults.asInstanceOf[Set[DoubleMetric]].size == runnerResults.asInstanceOf[Set[DoubleMetric]].size) + assert(separateResults.asInstanceOf[Set[DoubleMetric]].size == + runnerResults.asInstanceOf[Set[DoubleMetric]].size) separateResults.asInstanceOf[Set[DoubleMetric]].foreach( result => { assert(runnerResults.toString.contains(result.toString)) } diff --git a/src/test/scala/com/amazon/deequ/checks/CheckTest.scala b/src/test/scala/com/amazon/deequ/checks/CheckTest.scala index f865ee5c..52acc951 100644 --- a/src/test/scala/com/amazon/deequ/checks/CheckTest.scala +++ b/src/test/scala/com/amazon/deequ/checks/CheckTest.scala @@ -526,7 +526,7 @@ class CheckTest extends AnyWordSpec with Matchers with SparkContextSpec with Fix assertEvaluatesTo(numericRangeCheck9, numericRangeResults, CheckStatus.Success) } - "correctly evaluate range constraints when values have single quote(') in string" in withSparkSession { sparkSession => + "correctly evaluate range constraints when values have single quote in string" in withSparkSession { sparkSession => val rangeCheck = Check(CheckLevel.Error, "a") .isContainedIn("att2", Array("can't", "help", "but", "wouldn't"))