diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala index cd75da3bb5da..5291d1ef1eeb 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala @@ -25,7 +25,7 @@ import org.apache.hudi.internal.schema.HoodieSchemaException import org.apache.avro.Schema.Type import org.apache.avro.generic.GenericRecord -import org.apache.avro.{JsonProperties, Schema} +import org.apache.avro.{AvroRuntimeException, JsonProperties, Schema} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} @@ -149,6 +149,7 @@ object AvroConversionUtils { val avroSchema = schemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace) getAvroSchemaWithDefaults(avroSchema, structType) } catch { + case a: AvroRuntimeException => throw new HoodieSchemaException(a.getMessage, a) case e: Exception => throw new HoodieSchemaException("Failed to convert struct type to avro schema: " + structType, e) } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala index 5cd6ac3954ee..3bec605b6851 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala @@ -22,6 +22,7 @@ import java.nio.ByteBuffer import java.util.Objects import org.apache.avro.Schema import org.apache.avro.generic.GenericData +import org.apache.hudi.internal.schema.HoodieSchemaException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types.{ArrayType, BinaryType, DataType, DataTypes, MapType, StringType, StructField, StructType} @@ -444,4 +445,13 @@ class TestAvroConversionUtils extends FunSuite with Matchers { private def checkNull(left: Any, right: Any): Boolean = { (left == null && right != null) || (left == null && right != null) } + + test("convert struct type with duplicate column names") { + val struct = new StructType().add("id", DataTypes.LongType, true) + .add("name", DataTypes.StringType, true) + .add("name", DataTypes.StringType, true) + the[HoodieSchemaException] thrownBy { + AvroConversionUtils.convertStructTypeToAvroSchema(struct, "SchemaName", "SchemaNS") + } should have message "Duplicate field name in record SchemaNS.SchemaName: name type:UNION pos:2 and name type:UNION pos:1." + } }