NVIDIA · nartal1 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -1500,10 +1500,17 @@ def test_parquet_check_schema_compatibility_nested_types(spark_tmp_path):
     (DecimalGen(7, 4), DecimalGen(5, 2)),
     (DecimalGen(10, 7), DecimalGen(5, 2)),
     (DecimalGen(20, 17), DecimalGen(5, 2)),
+    # Narrowing precision
+    (DecimalGen(20, 0), DecimalGen(10, 0)),
     # Increasing precision and decreasing scale
     (DecimalGen(5, 4), DecimalGen(7, 2)),
     (DecimalGen(10, 6), DecimalGen(12, 4)),
     (DecimalGen(20, 7), DecimalGen(22, 5)),
+    # Increasing the precision and keeping the scale same (increasing the whole number part)
+    (DecimalGen(10, 2), DecimalGen(22, 2)),
+    # Decreasing the scale and keeping the precision same (decreasing the whole number part)
+    (DecimalGen(10, 5), DecimalGen(10, 2)),
+    (DecimalGen(20, 10), DecimalGen(20, 5)),
     # Increasing precision by a smaller amount than scale
     (DecimalGen(5, 2), DecimalGen(6, 4)),
     (DecimalGen(10, 4), DecimalGen(12, 7))
@@ -1524,13 +1531,6 @@ def test_parquet_decimal_precision_scale_change(spark_tmp_path, from_decimal_gen
         StructField("a", to_decimal_gen.data_type)
     ])
 
-    # Determine if we expect an error based on precision and scale changes
-    expect_error = (
-            to_decimal_gen.scale < from_decimal_gen.scale or
-            (to_decimal_gen.precision - to_decimal_gen.scale) <
-            (from_decimal_gen.precision - from_decimal_gen.scale)
-    )
-
     spark_conf = {}
     if is_before_spark_400():
         # In Spark versions earlier than 4.0, the vectorized Parquet reader throws an exception
@@ -1539,17 +1539,8 @@ def test_parquet_decimal_precision_scale_change(spark_tmp_path, from_decimal_gen
         # is ignored by the plugin.
         spark_conf['spark.sql.parquet.enableVectorizedReader'] = 'false'
 
-    if expect_error:
-        assert_gpu_and_cpu_error(
-            lambda spark: spark.read.schema(read_schema).parquet(data_path).collect(),
-            conf={},
-            error_message="Parquet column cannot be converted"
-        )
-    else:
-        assert_gpu_and_cpu_are_equal_collect(
-            lambda spark: spark.read.schema(read_schema).parquet(data_path),
-            conf=spark_conf
-        )
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark: spark.read.schema(read_schema).parquet(data_path), conf=spark_conf)
 
 
 @pytest.mark.skipif(is_before_spark_320() or is_spark_321cdh(), reason='Encryption is not supported before Spark 3.2.0 or Parquet < 1.12')

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1616,7 +1616,7 @@ object GpuCast {
     }
   }
 
-  private def castDecimalToDecimal(
+  def castDecimalToDecimal(
       input: ColumnView,
       from: DecimalType,
       to: DecimalType,

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala
@@ -1033,21 +1033,13 @@ private case class GpuParquetFileFilterHandler(
   private def canReadAsDecimal(pt: PrimitiveType, dt: DataType): Boolean = {
     (DecimalType.is32BitDecimalType(dt)
       || DecimalType.is64BitDecimalType(dt)
-      || DecimalType.isByteArrayDecimalType(dt)) && isDecimalTypeMatched(pt.getDecimalMetadata, dt)
+      || DecimalType.isByteArrayDecimalType(dt)) && isValidDecimalType(pt.getDecimalMetadata)
   }
 
   // TODO: After we deprecate Spark 3.1, fetch decimal meta with DecimalLogicalTypeAnnotation
   @scala.annotation.nowarn("msg=class DecimalMetadata in package schema is deprecated")
-  private def isDecimalTypeMatched(metadata: DecimalMetadata,
-                                   sparkType: DataType): Boolean = {
-    if (metadata == null) {
-      false
-    } else {
-      val dt = sparkType.asInstanceOf[DecimalType]
-      val scaleIncrease = dt.scale - metadata.getScale
-      val precisionIncrease = dt.precision - metadata.getPrecision
-      scaleIncrease >= 0 && precisionIncrease >= scaleIncrease
-    }
+  private def isValidDecimalType(metadata: DecimalMetadata): Boolean = {
+    metadata != null
   }
 }
 

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ParquetSchemaUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ParquetSchemaUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -576,7 +576,10 @@ object ParquetSchemaUtils {
   private def evolveSchemaCasts(cv: ColumnView, dt: DataType, originalFromDt: DataType)
   : ColumnView = {
     if (needDecimalCast(cv, dt)) {
-      cv.castTo(DecimalUtil.createCudfDecimal(dt.asInstanceOf[DecimalType]))
+      val fromDecimal = originalFromDt.asInstanceOf[DecimalType]
+      val toDecimal = dt.asInstanceOf[DecimalType]
+      val ansiMode = CastOptions.DEFAULT_CAST_OPTIONS.isAnsiMode
+      GpuCast.castDecimalToDecimal(cv, fromDecimal, toDecimal, ansiMode)
     } else if (needUnsignedToSignedCast(cv, dt) || needInt32Downcast(cv, dt) ||
         needSignedUpcast(cv, dt)) {
       cv.castTo(DType.create(GpuColumnVector.getNonNestedRapidsType(dt).getTypeId))

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -85,7 +85,7 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("unannotated array of struct with unannotated array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11476"))
   enableSuite[RapidsParquetQuerySuite]
     .exclude("SPARK-26677: negated null-safe equality comparison should not filter matched row groups", ADJUST_UT("fetches the CPU version of Execution Plan instead of the GPU version."))
-    .exclude("SPARK-34212 Parquet should read decimals correctly", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11433"))
+    .exclude("SPARK-34212 Parquet should read decimals correctly", ADJUST_UT("Vectorized Parquet reader throws an exception when scale is narrowed in Apache Spark where as the spark-rapids plugin does not."))
   enableSuite[RapidsParquetRebaseDatetimeSuite]
     .exclude("SPARK-31159, SPARK-37705: compatibility with Spark 2.4/3.2 in reading dates/timestamps", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11599"))
     .exclude("SPARK-31159, SPARK-37705: rebasing timestamps in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11593"))