Skip to content

Commit

Permalink
Spark 3.3: Fix Decimal precision in JSON mode on reading (#245)
Browse files Browse the repository at this point in the history
  • Loading branch information
pan3793 committed May 22, 2023
1 parent 2d715e6 commit 246f28d
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.types.DataTypes.{createArrayType, createMapType}
import org.apache.spark.sql.types._

import java.math.MathContext

class ClickHouseDataTypeSuite extends SparkClickHouseSingleTest {

test("write supported data types") {
Expand Down Expand Up @@ -121,6 +123,40 @@ class ClickHouseDataTypeSuite extends SparkClickHouseSingleTest {
}
}

// Decimal(P, S): P - precision, S - scale, which have different support range in Spark and ClickHouse.
//
// Spark:
// Decimal(P, S): P: [ 1:38]; S: [0:P]
// ClickHouse:
// Decimal(P, S): P: [ 1:76]; S: [0:P]
// Decimal32(S): P: [ 1: 9]; S: [0:P]
// Decimal64(S): P: [10:18]; S: [0:P]
// Decimal128(S): P: [19:38]; S: [0:P]
// Decimal256(S): P: [39:76]; S: [0:P]
Seq(
("Decimal(38,9)", 38, 9),
("Decimal32(4)", 9, 4),
("Decimal64(4)", 18, 4),
("Decimal128(4)", 38, 4)
).foreach { case (dataType, p, s) =>
test(s"DataType - $dataType") {
testDataType(dataType) { (db, tbl) =>
runClickHouseSQL(
s"""INSERT INTO $db.$tbl VALUES
|(1, '11.1')
|""".stripMargin
)
} { df =>
assert(df.schema.length === 2)
assert(df.schema.fields(1).dataType === DecimalType(p, s))
checkAnswer(
df,
Row(1, BigDecimal("11.1", new MathContext(p))) :: Nil
)
}
}
}

private def testDataType(valueColDef: String)(prepare: (String, String) => Unit)(validate: DataFrame => Unit)
: Unit = {
val db = "test_kv_db"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ class ClickHouseJsonReader(
case FloatType => jsonNode.asDouble.floatValue
case DoubleType => jsonNode.asDouble
case d: DecimalType if jsonNode.isBigDecimal =>
Decimal(jsonNode.decimalValue.setScale(d.scale, RoundingMode.HALF_UP))
Decimal(jsonNode.decimalValue, d.precision, d.scale)
case d: DecimalType if jsonNode.isFloat | jsonNode.isDouble =>
Decimal(BigDecimal(jsonNode.doubleValue, new MathContext(d.precision)), d.precision, d.scale)
case d: DecimalType =>
Decimal(BigDecimal(jsonNode.asText, new MathContext(d.scale, RM.HALF_UP)))
Decimal(BigDecimal(jsonNode.textValue, new MathContext(d.precision)), d.precision, d.scale)
case TimestampType =>
ZonedDateTime.parse(jsonNode.asText, dateTimeFmt.withZone(scanJob.tz))
.withZoneSameInstant(ZoneOffset.UTC)
Expand Down

0 comments on commit 246f28d

Please sign in to comment.