Skip to content

Commit

Permalink
3.4 and 3.5 done with tested
Browse files Browse the repository at this point in the history
  • Loading branch information
harryshi10 committed Jun 17, 2024
1 parent afe8e56 commit 477de61
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/configurations/02_sql_configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ license: |
<!--begin-include-->
|Key | Default | Description | Since
|--- | ------- | ----------- | -----
spark.clickhouse.fixedStringReadAs|binary|read ClickHouse FixedString type as the specified Spark data type. Supported formats: binary, string|0.8.1
spark.clickhouse.ignoreUnsupportedTransform|false|ClickHouse supports using complex expressions as sharding keys or partition values, e.g. `cityHash64(col_1, col_2)`, and those can not be supported by Spark now. If `true`, ignore the unsupported expressions, otherwise fail fast w/ an exception. Note, when `spark.clickhouse.write.distributed.convertLocal` is enabled, ignore unsupported sharding keys may corrupt the data.|0.4.0
spark.clickhouse.read.compression.codec|lz4|The codec used to decompress data for reading. Supported codecs: none, lz4.|0.5.0
spark.clickhouse.read.distributed.convertLocal|true|When reading Distributed table, read local table instead of itself. If `true`, ignore `spark.clickhouse.read.distributed.useClusterNodes`.|0.1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,12 @@ object ClickHouseSQLConf {
.version("0.8.0")
.booleanConf
.createWithDefault(false)

val FIXED_STRING_READ_AS: ConfigEntry[String] =
buildConf("spark.clickhouse.fixedStringReadAs")
.doc("read ClickHouse FixedString type as the specified Spark data type. Supported formats: binary, string")
.version("0.8.1")
.stringConf
.transform(_.toLowerCase)
.createWithDefault("binary")
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,22 @@ import com.clickhouse.data.ClickHouseDataType._
import com.clickhouse.data.{ClickHouseColumn, ClickHouseDataType}
import org.apache.spark.sql.types._
import xenon.clickhouse.exception.CHClientException
import org.apache.spark.sql.catalyst.SQLConfHelper
import org.apache.spark.sql.clickhouse.ClickHouseSQLConf.FIXED_STRING_READ_AS

object SchemaUtils {
object SchemaUtils extends SQLConfHelper {

def fromClickHouseType(chColumn: ClickHouseColumn): (DataType, Boolean) = {
val catalystType = chColumn.getDataType match {
case Nothing => NullType
case Bool => BooleanType
case String | JSON | UUID | Enum8 | Enum16 | IPv4 | IPv6 => StringType
case FixedString => BinaryType
case FixedString =>
conf.getConf(FIXED_STRING_READ_AS) match {
case "binary" => BinaryType
case "string" => StringType
case unsupported => throw CHClientException(s"Unsupported fixed string read format mapping: $unsupported")
}
case Int8 => ByteType
case UInt8 | Int16 => ShortType
case UInt16 | Int32 => IntegerType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,12 @@ object ClickHouseSQLConf {
.version("0.8.0")
.booleanConf
.createWithDefault(false)

val FIXED_STRING_READ_AS: ConfigEntry[String] =
buildConf("spark.clickhouse.fixedStringReadAs")
.doc("read ClickHouse FixedString type as the specified Spark data type. Supported formats: binary, string")
.version("0.8.1")
.stringConf
.transform(_.toLowerCase)
.createWithDefault("binary")
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,22 @@ import com.clickhouse.data.ClickHouseDataType._
import com.clickhouse.data.{ClickHouseColumn, ClickHouseDataType}
import org.apache.spark.sql.types._
import xenon.clickhouse.exception.CHClientException
import org.apache.spark.sql.catalyst.SQLConfHelper
import org.apache.spark.sql.clickhouse.ClickHouseSQLConf.FIXED_STRING_READ_AS

object SchemaUtils {
object SchemaUtils extends SQLConfHelper {

def fromClickHouseType(chColumn: ClickHouseColumn): (DataType, Boolean) = {
val catalystType = chColumn.getDataType match {
case Nothing => NullType
case Bool => BooleanType
case String | JSON | UUID | Enum8 | Enum16 | IPv4 | IPv6 => StringType
case FixedString => BinaryType
case FixedString =>
conf.getConf(FIXED_STRING_READ_AS) match {
case "binary" => BinaryType
case "string" => StringType
case unsupported => throw CHClientException(s"Unsupported fixed string read format mapping: $unsupported")
}
case Int8 => ByteType
case UInt8 | Int16 => ShortType
case UInt16 | Int32 => IntegerType
Expand Down

0 comments on commit 477de61

Please sign in to comment.