From a0a4fdd50d4724b5df352e2f9a3aef764e2e1e4e Mon Sep 17 00:00:00 2001
From: Tuan Pham <Tuan.Pham@wisetechglobal.com>
Date: Sun, 20 Oct 2024 21:25:12 +1100
Subject: [PATCH] Add test for metadata diff

---
 .../fast/tests/DatasetComparerTest.scala      | 69 ++++++++-----------
 1 file changed, 29 insertions(+), 40 deletions(-)

diff --git a/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala
index ab46374..8684492 100644
--- a/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala
+++ b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala
@@ -593,28 +593,6 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
       assertLargeDatasetEquality(ds2, ds1)
     }
 
-    "can performed Dataset comparisons and ignore metadata" in {
-      val ds1 = Seq(
-        Person("juan", 5),
-        Person("bob", 1),
-        Person("li", 49),
-        Person("alice", 5)
-      ).toDS
-        .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
-        .as[Person]
-
-      val ds2 = Seq(
-        Person("juan", 5),
-        Person("bob", 1),
-        Person("li", 49),
-        Person("alice", 5)
-      ).toDS
-        .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
-        .as[Person]
-
-      assertLargeDatasetEquality(ds2, ds1)
-    }
-
     "can performed Dataset comparisons and compare metadata" in {
       val ds1 = Seq(
         Person("juan", 5),
@@ -853,26 +831,37 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
       e.assertColorDiff(Seq("float", "DoubleType", "MISSING"), Seq("word", "StringType", "StructField(long,LongType,true,{})"))
     }
 
-    "can performed Dataset comparisons and ignore metadata" in {
-      val ds1 = Seq(
-        Person("juan", 5),
-        Person("bob", 1),
-        Person("li", 49),
-        Person("alice", 5)
-      ).toDS
-        .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
-        .as[Person]
+    "correctly mark schema with unequal metadata" in {
+      val sourceDF = spark.createDF(
+        List(
+          (1, 2.0),
+          (5, 3.0)
+        ),
+        List(
+          ("number", IntegerType, true),
+          ("float", DoubleType, true)
+        )
+      )
 
-      val ds2 = Seq(
-        Person("juan", 5),
-        Person("bob", 1),
-        Person("li", 49),
-        Person("alice", 5)
-      ).toDS
-        .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
-        .as[Person]
+      val expectedDF = spark.createDF(
+        List(
+          (1, 2.0),
+          (5, 3.0)
+        ),
+        List(
+          ("number", IntegerType, true),
+          ("float", DoubleType, true)
+        )
+      ).withColumn("float", col("float").as("float", new MetadataBuilder().putString("description", "a float").build()))
 
-      assertSmallDatasetEquality(ds2, ds1)
+      val e = intercept[DatasetSchemaMismatch] {
+        assertSmallDatasetEquality(sourceDF, expectedDF, ignoreMetadata = false)
+      }
+
+      e.assertColorDiff(
+        Seq("{}"),
+        Seq("{\"description\":\"a float\"}")
+      )
     }
 
     "can performed Dataset comparisons and ignore metadata" in {