apache · himadripal · Feb 6, 2025 · Feb 11, 2025 · Feb 14, 2025 · Feb 16, 2025
diff --git a/docs/source/user-guide/compatibility.md b/docs/source/user-guide/compatibility.md
@@ -73,76 +73,77 @@ Spark.
 The following cast operations are generally compatible with Spark except for the differences noted here.
 
 | From Type | To Type | Notes |
-|-|-|-|
-| boolean | byte |  |
-| boolean | short |  |
+|-|---------|-|
+| boolean | byte    |  |
+| boolean | short   |  |
 | boolean | integer |  |
-| boolean | long |  |
-| boolean | float |  |
-| boolean | double |  |
-| boolean | string |  |
+| boolean | long    |  |
+| boolean | float   |  |
+| boolean | double  |  |
+| boolean | string  |  |
 | byte | boolean |  |
-| byte | short |  |
+| byte | short   |  |
 | byte | integer |  |
-| byte | long |  |
-| byte | float |  |
-| byte | double |  |
+| byte | long    |  |
+| byte | float   |  |
+| byte | double  |  |
 | byte | decimal |  |
-| byte | string |  |
+| byte | string  |  |
 | short | boolean |  |
-| short | byte |  |
+| short | byte    |  |
 | short | integer |  |
-| short | long |  |
-| short | float |  |
-| short | double |  |
+| short | long    |  |
+| short | float   |  |
+| short | double  |  |
 | short | decimal |  |
-| short | string |  |
+| short | string  |  |
 | integer | boolean |  |
-| integer | byte |  |
-| integer | short |  |
-| integer | long |  |
-| integer | float |  |
-| integer | double |  |
-| integer | string |  |
+| integer | byte    |  |
+| integer | short   |  |
+| integer | long    |  |
+| integer | float   |  |
+| integer | double  |  |
+| integer | string  |  |
 | long | boolean |  |
-| long | byte |  |
-| long | short |  |
+| long | byte    |  |
+| long | short   |  |
 | long | integer |  |
-| long | float |  |
-| long | double |  |
-| long | string |  |
+| long | float   |  |
+| long | double  |  |
+| long | string  |  |
 | float | boolean |  |
-| float | byte |  |
-| float | short |  |
+| float | byte    |  |
+| float | short   |  |
 | float | integer |  |
-| float | long |  |
-| float | double |  |
-| float | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| float | long    |  |
+| float | double  |  |
+| float | string  | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
 | double | boolean |  |
-| double | byte |  |
-| double | short |  |
+| double | byte    |  |
+| double | short   |  |
 | double | integer |  |
-| double | long |  |
-| double | float |  |
-| double | string | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
-| decimal | byte |  |
-| decimal | short |  |
+| double | long    |  |
+| double | float   |  |
+| double | string  | There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| decimal | byte    |  |
+| decimal | short   |  |
 | decimal | integer |  |
-| decimal | long |  |
-| decimal | float |  |
-| decimal | double |  |
-| decimal | string | There can be formatting differences in some case due to Spark using scientific notation where Comet does not |
+| decimal | long    |  |
+| decimal | float   |  |
+| decimal | double  |  |
+| decimal | string  | There can be formatting differences in some case due to Spark using scientific notation where Comet does not |
+| decimal | decimal |  |
 | string | boolean |  |
-| string | byte |  |
-| string | short |  |
+| string | byte    |  |
+| string | short   |  |
 | string | integer |  |
-| string | long |  |
-| string | binary |  |
-| string | date | Only supports years between 262143 BC and 262142 AD |
-| date | string |  |
-| timestamp | long |  |
-| timestamp | string |  |
-| timestamp | date |  |
+| string | long    |  |
+| string | binary  |  |
+| string | date    | Only supports years between 262143 BC and 262142 AD |
+| date | string  |  |
+| timestamp | long    |  |
+| timestamp | string  |  |
+| timestamp | date    |  |
 
 ### Incompatible Casts
 

diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs
@@ -872,6 +872,13 @@ fn cast_array(
     let array = array_with_timezone(array, cast_options.timezone.clone(), Some(to_type))?;
     let from_type = array.data_type().clone();
 
+    let native_cast_options: CastOptions = CastOptions {
+        safe: !matches!(cast_options.eval_mode, EvalMode::Ansi), // take safe mode from cast_options passed
+        format_options: FormatOptions::new()
+            .with_timestamp_tz_format(TIMESTAMP_FORMAT)
+            .with_timestamp_format(TIMESTAMP_FORMAT),
+    };
+
     let array = match &from_type {
         Dictionary(key_type, value_type)
             if key_type.as_ref() == &Int32
@@ -963,7 +970,7 @@ fn cast_array(
             || is_datafusion_spark_compatible(from_type, to_type, cast_options.allow_incompat) =>
         {
             // use DataFusion cast only when we know that it is compatible with Spark
-            Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
+            Ok(cast_with_options(&array, to_type, &native_cast_options)?)
         }
         _ => {
             // we should never reach this code because the Scala code should be checking

diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -70,13 +70,8 @@ object CometCast {
           case _ =>
             Unsupported
         }
-      case (from: DecimalType, to: DecimalType) =>
-        if (to.precision < from.precision) {
-          // https://github.com/apache/datafusion/issues/13492
-          Incompatible(Some("Casting to smaller precision is not supported"))
-        } else {
-          Compatible()
-        }
+      case (_: DecimalType, _: DecimalType) =>
+        Compatible()
       case (DataTypes.StringType, _) =>
         canCastFromString(toType, timeZoneId, evalMode)
       case (_, DataTypes.StringType) =>

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -25,12 +25,12 @@ import scala.util.Random
 import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
-import org.apache.spark.sql.{CometTestBase, DataFrame, SaveMode}
+import org.apache.spark.sql.{CometTestBase, DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.expressions.Cast
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType}
+import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType, StructField, StructType}
 
 import org.apache.comet.expressions.{CometCast, CometEvalMode, Compatible}
 
@@ -909,12 +909,15 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast between decimals with different precision and scale") {
-    // cast between default Decimal(38, 18) to Decimal(6,2)
-    val values = Seq(BigDecimal("12345.6789"), BigDecimal("9876.5432"), BigDecimal("123.4567"))
-    val df = withNulls(values)
-      .toDF("b")
-      .withColumn("a", col("b").cast(DecimalType(6, 2)))
-    checkSparkAnswer(df)
+    val rowData = Seq(
+      Row(BigDecimal("12345.6789")),
+      Row(BigDecimal("9876.5432")),
+      Row(BigDecimal("123.4567")))
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(rowData),
+      StructType(Seq(StructField("a", DataTypes.createDecimalType(10, 4)))))
+
+    castTest(df, DecimalType(6, 2))
   }
 
   test("cast between decimals with higher precision than source") {
@@ -1126,27 +1129,36 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
               val cometMessage =
                 if (cometException.getCause != null) cometException.getCause.getMessage
                 else cometException.getMessage
-              if (CometSparkSessionExtensions.isSpark40Plus) {
-                // for Spark 4 we expect to sparkException carries the message
+              // for comet decimal conversion throws ArrowError(string) from arrow - across spark versions the message dont match.
+              if (sparkMessage.contains("cannot be represented as")) {
                 assert(
-                  sparkException.getMessage
-                    .replace(".WITH_SUGGESTION] ", "]")
-                    .startsWith(cometMessage))
-              } else if (CometSparkSessionExtensions.isSpark34Plus) {
-                // for Spark 3.4 we expect to reproduce the error message exactly
-                assert(cometMessage == sparkMessage)
+                  cometMessage.contains("cannot be represented as") || cometMessage.contains(
+                    "too large to store"))
               } else {
-                // for Spark 3.3 we just need to strip the prefix from the Comet message
-                // before comparing
-                val cometMessageModified = cometMessage
-                  .replace("[CAST_INVALID_INPUT] ", "")
-                  .replace("[CAST_OVERFLOW] ", "")
-                  .replace("[NUMERIC_VALUE_OUT_OF_RANGE] ", "")
-
-                if (sparkMessage.contains("cannot be represented as")) {
-                  assert(cometMessage.contains("cannot be represented as"))
+                if (CometSparkSessionExtensions.isSpark40Plus) {
+                  // for Spark 4 we expect to sparkException carries the message
+                  assert(
+                    sparkException.getMessage
+                      .replace(".WITH_SUGGESTION] ", "]")
+                      .startsWith(cometMessage))
+                } else if (CometSparkSessionExtensions.isSpark34Plus) {
+                  // for Spark 3.4 we expect to reproduce the error message exactly
+                  assert(cometMessage == sparkMessage)
                 } else {
-                  assert(cometMessageModified == sparkMessage)
+                  // for Spark 3.3 we just need to strip the prefix from the Comet message
+                  // before comparing
+                  val cometMessageModified = cometMessage
+                    .replace("[CAST_INVALID_INPUT] ", "")
+                    .replace("[CAST_OVERFLOW] ", "")
+                    .replace("[NUMERIC_VALUE_OUT_OF_RANGE] ", "")
+
+                  if (sparkMessage.contains("cannot be represented as")) {
+                    assert(
+                      cometMessage.contains("cannot be represented as") || cometMessage.contains(
+                        "too large to store"))
+                  } else {
+                    assert(cometMessageModified == sparkMessage)
+                  }
                 }
               }
           }