From c7f8ec074e8e8ca4acbf57e71254f45760630217 Mon Sep 17 00:00:00 2001 From: Henning Esser Date: Wed, 10 Feb 2021 17:37:16 +0100 Subject: [PATCH 1/4] Add decimal type --- .../spark/jsonschema/SchemaConverter.scala | 40 +++++++++++++++++-- src/test/resources/testJsonSchema.json | 25 ++++++++++++ .../jsonschema/SchemaConverterTest.scala | 3 ++ 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala index 909d42e..96b8685 100644 --- a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala +++ b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala @@ -20,7 +20,7 @@ import scala.io.Source * given in the dataset. * */ -case class SchemaType(typeName: String, nullable: Boolean) +case class SchemaType(typeName: String, nullable: Boolean, precision: Option[Int] = None, range: Option[Int] = None) private case class NullableDataType(dataType: DataType, nullable: Boolean) object SchemaConverter { @@ -33,15 +33,24 @@ object SchemaConverter { val SchemaRoot = "/" val Definitions = "definitions" val Reference = "$ref" + val Decimal = "decimal" + val Precision = "precision" + val Range = "range" val TypeMap = Map( "string" -> StringType, "number" -> DoubleType, "float" -> FloatType, "integer" -> LongType, "boolean" -> BooleanType, + "decimal" -> DecimalType, "object" -> StructType, "array" -> ArrayType ) + object DecimalNames { + val Decimal = "decimal" + val Precision = "precision" + val Range = "range" + } var definitions: JsObject = JsObject(Seq.empty) private var isStrictTypingEnabled: Boolean = true @@ -85,21 +94,35 @@ object SchemaConverter { def getJsonId(json: JsValue): Option[String] = (json \ SchemaFieldId).asOpt[String] + def getDecimal(json: JsValue, nullable: Boolean): SchemaType = { + ((json \ DecimalNames.Precision).toOption, (json \ DecimalNames.Range).toOption) match { + case (Some(prec), Some(range)) => + SchemaType(DecimalNames.Decimal, nullable, Some(prec.as[Int]), Some(range.as[Int])) + case (None, None) => SchemaType(DecimalNames.Decimal, nullable) + case _ => throw new AssertionError("decimal type needs either both precision and range or none of them") + } + } + + def getSimpleType(json: JsValue, typeName: String, nullable: Boolean): SchemaType = { + if (typeName == DecimalNames.Decimal) getDecimal(json, nullable) + else SchemaType(typeName, nullable) + } + def getJsonType(json: JsObject, name: String): SchemaType = { val id = getJsonId(json).getOrElse(name) (json \ SchemaFieldType).getOrElse(JsNull) match { - case JsString(s) => SchemaType(s, nullable = false) + case JsString(s) => getSimpleType(json, s, nullable = false) case JsArray(array) => val nullable = array.contains(JsString("null")) array.size match { case 1 if nullable => throw new IllegalArgumentException("Null type only is not supported") case 1 => - SchemaType(array.apply(0).as[String], nullable = nullable) + getSimpleType(json, array.apply(0).as[String], nullable = nullable) case 2 if nullable => array.find(_ != JsString("null")) - .map(i => SchemaType(i.as[String], nullable = nullable)) + .map(i => getSimpleType(json, i.as[String], nullable = nullable)) .getOrElse { throw new IllegalArgumentException( s"Incorrect definition of a nullable parameter at <$id>" @@ -178,8 +201,17 @@ object SchemaConverter { private def getFieldType(json: JsObject, name: String): NullableDataType = { val fieldType = getJsonType(json, name) + assert( + TypeMap.keySet.contains(fieldType.typeName), + s"Unknown field type {${fieldType.typeName}}, possible values are: ${TypeMap.keySet}" + ) TypeMap(fieldType.typeName) match { + case DecimalType => (fieldType.precision, fieldType.range) match { + case (Some(prec), Some(range)) => NullableDataType(DataTypes.createDecimalType(prec, range), fieldType.nullable) + case _ => NullableDataType(DataTypes.createDecimalType(), fieldType.nullable) + } + case dataType: DataType => NullableDataType(dataType, fieldType.nullable) diff --git a/src/test/resources/testJsonSchema.json b/src/test/resources/testJsonSchema.json index b48c51a..c7a6d5a 100644 --- a/src/test/resources/testJsonSchema.json +++ b/src/test/resources/testJsonSchema.json @@ -153,6 +153,31 @@ "description": "to do", "name": "boolean" }, + "decimal": { + "id": "testSchema/decimal", + "type": "decimal", + "title": "Test decimal schema.", + "description": "to do", + "name": "decimal", + "precision": 38, + "range": 18 + }, + "decimal_default": { + "id": "testSchema/decimal_default", + "type": "decimal", + "title": "Test decimal schema.", + "description": "to do", + "name": "decimal_default" + }, + "decimal_nullable": { + "id": "testSchema/decimal_nullable", + "type": ["decimal", "null"], + "title": "Test decimal schema.", + "description": "to do", + "name": "decimal_nullable", + "precision": 38, + "range": 18 + }, "additionalProperty": { "id": "testSchema/additionalProperty", "type": "string", diff --git a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala index 418ad15..864a8da 100644 --- a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala +++ b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala @@ -30,6 +30,9 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { StructField("float", FloatType, nullable = false), StructField("nullable", DoubleType, nullable = true), StructField("boolean", BooleanType, nullable = false), + StructField("decimal", DecimalType(38, 18), nullable = false), + StructField("decimal_default", DecimalType(10, 0), nullable = false), + StructField("decimal_nullable", DecimalType(38, 18), nullable = true), StructField("additionalProperty", StringType, nullable = false) )) From 6d2dbfd79d2e85ef491626a6a2db7fbd8d4011ac Mon Sep 17 00:00:00 2001 From: Henning Esser Date: Wed, 10 Feb 2021 18:20:16 +0100 Subject: [PATCH 2/4] Add timestamp type --- .../org/zalando/spark/jsonschema/SchemaConverter.scala | 1 + src/test/resources/testJsonSchema.json | 7 +++++++ .../org/zalando/spark/jsonschema/SchemaConverterTest.scala | 1 + 3 files changed, 9 insertions(+) diff --git a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala index 96b8685..5b079a0 100644 --- a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala +++ b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala @@ -43,6 +43,7 @@ object SchemaConverter { "integer" -> LongType, "boolean" -> BooleanType, "decimal" -> DecimalType, + "timestamp" -> DataTypes.TimestampType, "object" -> StructType, "array" -> ArrayType ) diff --git a/src/test/resources/testJsonSchema.json b/src/test/resources/testJsonSchema.json index c7a6d5a..d64582b 100644 --- a/src/test/resources/testJsonSchema.json +++ b/src/test/resources/testJsonSchema.json @@ -178,6 +178,13 @@ "precision": 38, "range": 18 }, + "timetamp": { + "id": "testSchema/timestamp", + "type": "timestamp", + "title": "Test timestamp schema.", + "description": "to do", + "name": "timestamp" + }, "additionalProperty": { "id": "testSchema/additionalProperty", "type": "string", diff --git a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala index 864a8da..d7fe559 100644 --- a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala +++ b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala @@ -33,6 +33,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { StructField("decimal", DecimalType(38, 18), nullable = false), StructField("decimal_default", DecimalType(10, 0), nullable = false), StructField("decimal_nullable", DecimalType(38, 18), nullable = true), + StructField("timestamp", DataTypes.TimestampType, nullable = false), StructField("additionalProperty", StringType, nullable = false) )) From 15183498aca4693da0c4847781bd7964b528d394 Mon Sep 17 00:00:00 2001 From: Henning Esser Date: Wed, 10 Feb 2021 18:36:46 +0100 Subject: [PATCH 3/4] Fix tests --- .../spark/jsonschema/SchemaConverter.scala | 2 +- src/test/resources/testJsonSchema3.json | 16 +++++++++ .../jsonschema/SchemaConverterTest.scala | 35 ++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala index 5b079a0..cba925e 100644 --- a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala +++ b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala @@ -100,7 +100,7 @@ object SchemaConverter { case (Some(prec), Some(range)) => SchemaType(DecimalNames.Decimal, nullable, Some(prec.as[Int]), Some(range.as[Int])) case (None, None) => SchemaType(DecimalNames.Decimal, nullable) - case _ => throw new AssertionError("decimal type needs either both precision and range or none of them") + case _ => throw new IllegalArgumentException("decimal type needs either both precision and range or none of them") } } diff --git a/src/test/resources/testJsonSchema3.json b/src/test/resources/testJsonSchema3.json index b434a9a..003f36b 100644 --- a/src/test/resources/testJsonSchema3.json +++ b/src/test/resources/testJsonSchema3.json @@ -77,6 +77,22 @@ "boolean": { "type": "boolean" }, + "decimal": { + "type": "decimal", + "precision": 38, + "range": 18 + }, + "decimal_default": { + "type": "decimal" + }, + "decimal_nullable": { + "type": ["decimal", "null"], + "precision": 38, + "range": 18 + }, + "timestamp": { + "type": "timestamp" + }, "additionalProperty": { "type": "string" } diff --git a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala index d7fe559..701d6d9 100644 --- a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala +++ b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala @@ -439,7 +439,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { } test("null type only should fail") { - assertThrows[NoSuchElementException] { + assertThrows[AssertionError] { val schema = SchemaConverter.convertContent( """ { @@ -474,4 +474,37 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { } } + test("decimal type with only one of precision or range should fail") { + assertThrows[IllegalArgumentException] { + val schema = SchemaConverter.convertContent( + """ + { + "type": "object", + "properties": { + "decimal": { + "type": "decimal", + "range": 18 + } + } + } + """ + ) + } + assertThrows[IllegalArgumentException] { + val schema = SchemaConverter.convertContent( + """ + { + "type": "object", + "properties": { + "decimal": { + "type": "decimal", + "precision": 38 + } + } + } + """ + ) + } + } + } From 08cae3c46feb828d83de0506a221c264819274db Mon Sep 17 00:00:00 2001 From: Henning Esser Date: Wed, 10 Feb 2021 18:48:46 +0100 Subject: [PATCH 4/4] bump version to 0.6.4 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 9948954..be0709d 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "spark-json-schema" -version in ThisBuild := "0.6.3" +version in ThisBuild := "0.6.4" organization := "org.zalando" scalaVersion := "2.12.10"