Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make metadata test work on spark 3.0/3.1/3.2
Browse files Browse the repository at this point in the history
zeotuan committed Oct 12, 2024
1 parent 1791243 commit 9aae738
Showing 2 changed files with 22 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@ import org.apache.spark.sql.types.{DoubleType, IntegerType, MetadataBuilder, Str
import SparkSessionExt._
import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch
import com.github.mrpowers.spark.fast.tests.StringExt.StringOps
import org.apache.spark.sql.functions.col
import org.scalatest.freespec.AnyFreeSpec

class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with SparkSessionTestWrapper {
@@ -320,7 +321,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small int").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build()))

val expectedDF = spark
.createDF(
@@ -330,7 +331,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small number").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build()))

assertLargeDataFrameEquality(sourceDF, expectedDF)
}
@@ -344,7 +345,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small int").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build()))

val expectedDF = spark
.createDF(
@@ -354,7 +355,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small number").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build()))

intercept[DatasetSchemaMismatch] {
assertLargeDataFrameEquality(sourceDF, expectedDF, ignoreMetadata = false)
@@ -517,7 +518,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small int").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build()))

val expectedDF = spark
.createDF(
@@ -527,7 +528,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small number").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build()))

assertApproximateDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001)
}
@@ -541,7 +542,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small int").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build()))

val expectedDF = spark
.createDF(
@@ -551,7 +552,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small number").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build()))

intercept[DatasetSchemaMismatch] {
assertApproximateDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001, ignoreMetadata = false)
@@ -714,7 +715,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small int").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build()))

val expectedDF = spark
.createDF(
@@ -724,7 +725,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small number").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build()))

assertApproximateSmallDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001)
}
@@ -738,7 +739,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small int").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build()))

val expectedDF = spark
.createDF(
@@ -748,7 +749,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
),
List(("number", IntegerType, true))
)
.withMetadata("number", new MetadataBuilder().putString("description", "small number").build())
.withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build()))

intercept[DatasetSchemaMismatch] {
assertApproximateSmallDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001, ignoreMetadata = false)
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@ import org.apache.spark.sql.types._
import SparkSessionExt._
import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch
import com.github.mrpowers.spark.fast.tests.StringExt.StringOps
import org.apache.spark.sql.functions.col
import org.scalatest.freespec.AnyFreeSpec

object Person {
@@ -415,7 +416,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "name of the person").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
.as[Person]

val ds2 = Seq(
@@ -424,7 +425,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "name of the individual").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
.as[Person]

assertLargeDatasetEquality(ds2, ds1)
@@ -437,7 +438,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "name of the person").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
.as[Person]

val ds2 = Seq(
@@ -446,7 +447,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "name of the individual").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
.as[Person]

intercept[DatasetSchemaMismatch] {
@@ -623,7 +624,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "first name of a person").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
.as[Person]

val ds2 = Seq(
@@ -632,7 +633,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "first name of an individual").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
.as[Person]

assertSmallDatasetEquality(ds2, ds1)
@@ -645,7 +646,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "name of the person").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
.as[Person]

val ds2 = Seq(
@@ -654,7 +655,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
Person("li", 49),
Person("alice", 5)
).toDS
.withMetadata("name", new MetadataBuilder().putString("description", "name of the individual").build())
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
.as[Person]

intercept[DatasetSchemaMismatch] {

0 comments on commit 9aae738

Please sign in to comment.