From a0fc79b7cf0107c0aced3f14443b2a1b2c250f9e Mon Sep 17 00:00:00 2001 From: Shenoda Guirguis Date: Mon, 20 Sep 2021 22:31:57 -0700 Subject: [PATCH] Move to Avro 1.10.2 (#82) * update to avro 1.10.2 and disable avro validation for default values * backporting fix for Parquet to work with Avro 1.10.2 from PR#1648 (mainstream) Co-authored-by: Shenoda Guirguis --- build.gradle | 8 +- .../iceberg/TestManifestListVersions.java | 13 +-- .../iceberg/avro/TestReadProjection.java | 97 ++++++++++--------- .../hive/legacy/LegacyHiveTableUtils.java | 4 +- .../apache/iceberg/parquet/ParquetAvro.java | 21 ++-- .../iceberg/avro/TestReadProjection.java | 95 ++++++++++-------- .../iceberg/spark/data/TestHelpers.java | 3 +- versions.props | 2 +- 8 files changed, 136 insertions(+), 107 deletions(-) diff --git a/build.gradle b/build.gradle index 40e4fa551..0d34ecedc 100644 --- a/build.gradle +++ b/build.gradle @@ -95,6 +95,7 @@ subprojects { force 'com.fasterxml.jackson.module:jackson-module-scala_2.11:2.10.2' force 'com.fasterxml.jackson.module:jackson-module-scala_2.12:2.10.2' force 'com.fasterxml.jackson.module:jackson-module-paranamer:2.10.2' + force 'com.fasterxml.jackson.core:jackson-databind:2.10.2' } } @@ -532,10 +533,11 @@ project(':iceberg-mr') { testCompile project(path: ':iceberg-core', configuration: 'testArtifacts') testCompile project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') - testCompile("org.apache.avro:avro:1.9.2") + testCompile("org.apache.avro:avro:1.10.2") testCompile("org.apache.calcite:calcite-core") testCompile("com.esotericsoftware:kryo-shaded:4.0.2") - testCompile("com.fasterxml.jackson.core:jackson-annotations:2.6.5") + testCompile("com.fasterxml.jackson.core:jackson-annotations:2.10.2") + testCompile("com.fasterxml.jackson.core:jackson-databind:2.10.2") testCompile("org.apache.hive:hive-service") { exclude group: 'org.apache.hive', module: 'hive-exec' } @@ -602,7 +604,7 @@ if (jdkVersion == '8') { testCompile("org.apache.avro:avro:1.9.2") testCompile("org.apache.calcite:calcite-core") testCompile("com.esotericsoftware:kryo-shaded:4.0.2") - testCompile("com.fasterxml.jackson.core:jackson-annotations:2.6.5") + testCompile("com.fasterxml.jackson.core:jackson-annotations:2.10.2") testCompile("org.apache.hive:hive-service:3.1.2") { exclude group: 'org.apache.hive', module: 'hive-exec' } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java index 35a52a1f6..3172fcba2 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestListVersions.java @@ -27,6 +27,7 @@ import org.apache.avro.generic.GenericRecordBuilder; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.avro.TestReadProjection; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.io.InputFile; @@ -132,9 +133,9 @@ public void testV1ForwardCompatibility() throws IOException { Assert.assertEquals("Added rows count", ADDED_ROWS, (long) generic.get("added_rows_count")); Assert.assertEquals("Existing rows count", EXISTING_ROWS, (long) generic.get("existing_rows_count")); Assert.assertEquals("Deleted rows count", DELETED_ROWS, (long) generic.get("deleted_rows_count")); - Assert.assertNull("Content", generic.get(ManifestFile.MANIFEST_CONTENT.name())); - Assert.assertNull("Sequence number", generic.get(ManifestFile.SEQUENCE_NUMBER.name())); - Assert.assertNull("Min sequence number", generic.get(ManifestFile.MIN_SEQUENCE_NUMBER.name())); + TestReadProjection.assertNotProjected("Content", generic, ManifestFile.MANIFEST_CONTENT.name()); + TestReadProjection.assertNotProjected("Sequence number", generic, ManifestFile.SEQUENCE_NUMBER.name()); + TestReadProjection.assertNotProjected("Min sequence number", generic, ManifestFile.MIN_SEQUENCE_NUMBER.name()); } @Test @@ -154,9 +155,9 @@ public void testV2ForwardCompatibility() throws IOException { Assert.assertEquals("Added rows count", ADDED_ROWS, (long) generic.get("added_rows_count")); Assert.assertEquals("Existing rows count", EXISTING_ROWS, (long) generic.get("existing_rows_count")); Assert.assertEquals("Deleted rows count", DELETED_ROWS, (long) generic.get("deleted_rows_count")); - Assert.assertNull("Content", generic.get(ManifestFile.MANIFEST_CONTENT.name())); - Assert.assertNull("Sequence number", generic.get(ManifestFile.SEQUENCE_NUMBER.name())); - Assert.assertNull("Min sequence number", generic.get(ManifestFile.MIN_SEQUENCE_NUMBER.name())); + TestReadProjection.assertNotProjected("Content", generic, ManifestFile.MANIFEST_CONTENT.name()); + TestReadProjection.assertNotProjected("Sequence number", generic, ManifestFile.SEQUENCE_NUMBER.name()); + TestReadProjection.assertNotProjected("Min sequence number", generic, ManifestFile.MIN_SEQUENCE_NUMBER.name()); } @Test diff --git a/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java b/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java index 9243a809f..0d7c0d077 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import org.apache.avro.AvroRuntimeException; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; @@ -148,7 +149,7 @@ public void testBasicProjection() throws Exception { ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); - Assert.assertNull("Should not project data", projected.get("data")); + assertNotProjected("should not project data", projected, "data"); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Schema dataOnly = new Schema( @@ -157,7 +158,7 @@ public void testBasicProjection() throws Exception { projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); @@ -210,9 +211,8 @@ public void testNestedStructProjection() throws Exception { ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); - Record projectedLocation = (Record) projected.get("location"); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project location", projectedLocation); + assertNotProjected("should not project location", projected, "location"); Schema latOnly = new Schema( Types.NestedField.optional(3, "location", Types.StructType.of( @@ -221,10 +221,10 @@ public void testNestedStructProjection() throws Exception { ); projected = writeAndRead("latitude_only", writeSchema, latOnly, record); - projectedLocation = (Record) projected.get("location"); - Assert.assertNull("Should not project id", projected.get("id")); - Assert.assertNotNull("Should project location", projected.get("location")); - Assert.assertNull("Should not project longitude", projectedLocation.get("long")); + assertNotProjected("Should not project id", projected, "id"); + Record projectedLocation = (Record) projected.get("location"); + Assert.assertNotNull("Should project location", projectedLocation); + assertNotProjected("Should not project longitude", projected, "long"); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.get("lat"), 0.000001f); @@ -236,16 +236,16 @@ public void testNestedStructProjection() throws Exception { projected = writeAndRead("longitude_only", writeSchema, longOnly, record); projectedLocation = (Record) projected.get("location"); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project location", projected.get("location")); - Assert.assertNull("Should not project latitutde", projectedLocation.get("lat")); + assertNotProjected("Should not project latitutde", projectedLocation, "lat"); Assert.assertEquals("Should project longitude", -1.539054f, (float) projectedLocation.get("long"), 0.000001f); Schema locationOnly = writeSchema.select("location"); projected = writeAndRead("location_only", writeSchema, locationOnly, record); projectedLocation = (Record) projected.get("location"); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project location", projected.get("location")); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.get("lat"), 0.000001f); @@ -273,23 +273,23 @@ public void testMapProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project properties map", projected.get("properties")); + assertNotProjected("Should not project properties map", projected, "properties"); Schema keyOnly = writeSchema.select("properties.key"); projected = writeAndRead("key_only", writeSchema, keyOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.get("properties"))); Schema valueOnly = writeSchema.select("properties.value"); projected = writeAndRead("value_only", writeSchema, valueOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("should not project id", projected, "id"); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.get("properties"))); Schema mapOnly = writeSchema.select("properties"); projected = writeAndRead("map_only", writeSchema, mapOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.get("properties"))); } @@ -337,16 +337,16 @@ public void testMapOfStructsProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project locations map", projected.get("locations")); + assertNotProjected("Should not project locations map", projected, "locations"); projected = writeAndRead("all_locations", writeSchema, writeSchema.select("locations"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project locations map", record.get("locations"), toStringMap((Map) projected.get("locations"))); projected = writeAndRead("lat_only", writeSchema, writeSchema.select("locations.lat"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Map locations = toStringMap((Map) projected.get("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", @@ -355,28 +355,28 @@ public void testMapOfStructsProjection() throws IOException { Assert.assertNotNull("L1 should not be null", projectedL1); Assert.assertEquals("L1 should contain lat", 53.992811f, (float) projectedL1.get("lat"), 0.000001); - Assert.assertNull("L1 should not contain long", projectedL1.get("long")); + assertNotProjected("L1 should not contain long", projectedL1, "long"); Record projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); Assert.assertEquals("L2 should contain lat", 52.995143f, (float) projectedL2.get("lat"), 0.000001); - Assert.assertNull("L2 should not contain long", projectedL2.get("long")); + assertNotProjected("L2 should not contain long", projectedL2, "long"); projected = writeAndRead("long_only", writeSchema, writeSchema.select("locations.long"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); locations = toStringMap((Map) projected.get("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", Sets.newHashSet("L1", "L2"), locations.keySet()); projectedL1 = (Record) locations.get("L1"); Assert.assertNotNull("L1 should not be null", projectedL1); - Assert.assertNull("L1 should not contain lat", projectedL1.get("lat")); + assertNotProjected("L1 should not contain lat", projectedL1, "lat"); Assert.assertEquals("L1 should contain long", -1.542616f, (float) projectedL1.get("long"), 0.000001); projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); - Assert.assertNull("L2 should not contain lat", projectedL2.get("lat")); + assertNotProjected("L2 should not contain lat", projectedL2, "lat"); Assert.assertEquals("L2 should contain long", -1.539054f, (float) projectedL2.get("long"), 0.000001); @@ -390,7 +390,7 @@ public void testMapOfStructsProjection() throws IOException { ); projected = writeAndRead("latitude_renamed", writeSchema, latitiudeRenamed, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); locations = toStringMap((Map) projected.get("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", @@ -399,14 +399,14 @@ public void testMapOfStructsProjection() throws IOException { Assert.assertNotNull("L1 should not be null", projectedL1); Assert.assertEquals("L1 should contain latitude", 53.992811f, (float) projectedL1.get("latitude"), 0.000001); - Assert.assertNull("L1 should not contain lat", projectedL1.get("lat")); - Assert.assertNull("L1 should not contain long", projectedL1.get("long")); + assertNotProjected("L1 should not contain lat", projectedL1, "lat"); + assertNotProjected("L1 should not contain long", projectedL1, "long"); projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); Assert.assertEquals("L2 should contain latitude", 52.995143f, (float) projectedL2.get("latitude"), 0.000001); - Assert.assertNull("L2 should not contain lat", projectedL2.get("lat")); - Assert.assertNull("L2 should not contain long", projectedL2.get("long")); + assertNotProjected("L2 should not contain lat", projectedL2, "lat"); + assertNotProjected("L2 should not contain long", projectedL2, "long"); } @Test @@ -429,16 +429,16 @@ public void testListProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project values list", projected.get("values")); + assertNotProjected("should not project values", projected, "values"); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("should not project id", projected, "id"); Assert.assertEquals("Should project entire list", values, projected.get("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("should not project id", projected, "id"); Assert.assertEquals("Should project entire list", values, projected.get("values")); } @@ -473,35 +473,35 @@ public void testListOfStructsProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project points list", projected.get("points")); + assertNotProjected("Should not project points list", projected, "points"); projected = writeAndRead("all_points", writeSchema, writeSchema.select("points"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project points list", record.get("points"), projected.get("points")); projected = writeAndRead("x_only", writeSchema, writeSchema.select("points.x"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project points list", projected.get("points")); List points = (List) projected.get("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); Record projectedP1 = points.get(0); Assert.assertEquals("Should project x", 1, (int) projectedP1.get("x")); - Assert.assertNull("Should not project y", projectedP1.get("y")); + assertNotProjected("Should not project y", projectedP1, "y"); Record projectedP2 = points.get(1); Assert.assertEquals("Should project x", 3, (int) projectedP2.get("x")); - Assert.assertNull("Should not project y", projectedP2.get("y")); + assertNotProjected("Should not project y", projectedP2, "y"); projected = writeAndRead("y_only", writeSchema, writeSchema.select("points.y"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project points list", projected.get("points")); points = (List) projected.get("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); projectedP1 = points.get(0); - Assert.assertNull("Should not project x", projectedP1.get("x")); + assertNotProjected("Should not project x", projectedP1, "x"); Assert.assertEquals("Should project y", 2, (int) projectedP1.get("y")); projectedP2 = points.get(1); - Assert.assertNull("Should not project x", projectedP2.get("x")); + assertNotProjected("Should not project x", projectedP2, "x"); Assert.assertEquals("Should project null y", null, projectedP2.get("y")); Schema yRenamed = new Schema( @@ -513,17 +513,26 @@ public void testListOfStructsProjection() throws IOException { ); projected = writeAndRead("y_renamed", writeSchema, yRenamed, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project points list", projected.get("points")); points = (List) projected.get("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); projectedP1 = points.get(0); - Assert.assertNull("Should not project x", projectedP1.get("x")); - Assert.assertNull("Should not project y", projectedP1.get("y")); + assertNotProjected("Should not project x", projectedP1, "x"); + assertNotProjected("Should not project y", projectedP1, "y"); Assert.assertEquals("Should project z", 2, (int) projectedP1.get("z")); projectedP2 = points.get(1); - Assert.assertNull("Should not project x", projectedP2.get("x")); - Assert.assertNull("Should not project y", projectedP2.get("y")); + assertNotProjected("Should not project x", projectedP2, "x"); + assertNotProjected("Should not project y", projectedP2, "y"); Assert.assertEquals("Should project null z", null, projectedP2.get("z")); } + + public static void assertNotProjected(String message, Record projected, String fieldName) { + try { + projected.get(fieldName); + Assert.fail(message); + } catch (Exception e) { + Assert.assertTrue(e instanceof AvroRuntimeException); + } + } } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java index a4e7ff3fa..5a38fc9e2 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java @@ -56,7 +56,9 @@ private LegacyHiveTableUtils() { static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) { Map props = getTableProperties(table); String schemaStr = props.get("avro.schema.literal"); - org.apache.avro.Schema avroSchema = schemaStr != null ? new org.apache.avro.Schema.Parser().parse(schemaStr) : null; + // Disable default value validation for backward compatibility with Avro 1.7 + org.apache.avro.Schema avroSchema = + schemaStr != null ? new org.apache.avro.Schema.Parser().setValidateDefaults(false).parse(schemaStr) : null; Schema schema; if (avroSchema != null) { String serde = table.getSd().getSerdeInfo().getSerializationLib(); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java index 669269a2a..1c37f5436 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java @@ -20,8 +20,10 @@ package org.apache.iceberg.parquet; import java.math.BigDecimal; +import java.math.BigInteger; import java.util.List; import java.util.Map; +import java.util.WeakHashMap; import org.apache.avro.Conversion; import org.apache.avro.Conversions; import org.apache.avro.LogicalType; @@ -30,6 +32,7 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; import org.apache.avro.specific.SpecificData; +import org.apache.commons.math3.util.Pair; import org.apache.iceberg.avro.AvroSchemaVisitor; import org.apache.iceberg.avro.UUIDConversion; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -48,8 +51,8 @@ static Schema parquetAvroSchema(Schema avroSchema) { static class ParquetDecimal extends LogicalType { private static final String NAME = "parquet-decimal"; - private int precision; - private int scale; + private final int precision; + private final int scale; ParquetDecimal(int precision, int scale) { super(NAME); @@ -154,12 +157,10 @@ public Long toLong(BigDecimal value, org.apache.avro.Schema schema, LogicalType } private static class FixedDecimalConversion extends Conversions.DecimalConversion { - private final LogicalType[] decimalsByScale = new LogicalType[39]; + private final WeakHashMap, LogicalType> decimalsByScale; private FixedDecimalConversion() { - for (int i = 0; i < decimalsByScale.length; i += 1) { - decimalsByScale[i] = LogicalTypes.decimal(i, i); - } + this.decimalsByScale = new WeakHashMap<>(); } @Override @@ -169,12 +170,16 @@ public String getLogicalTypeName() { @Override public BigDecimal fromFixed(GenericFixed value, Schema schema, LogicalType type) { - return super.fromFixed(value, schema, decimalsByScale[((ParquetDecimal) type).scale()]); + ParquetDecimal dec = (ParquetDecimal) type; + return new BigDecimal(new BigInteger(value.bytes()), dec.scale()); } @Override public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) { - return super.toFixed(value, schema, decimalsByScale[((ParquetDecimal) type).scale()]); + ParquetDecimal dec = (ParquetDecimal) type; + Pair key = new Pair<>(dec.precision(), dec.scale()); + return super.toFixed(value, schema, + decimalsByScale.computeIfAbsent(key, k -> LogicalTypes.decimal(k.getFirst(), k.getSecond()))); } } diff --git a/parquet/src/test/java/org/apache/iceberg/avro/TestReadProjection.java b/parquet/src/test/java/org/apache/iceberg/avro/TestReadProjection.java index 9243a809f..4479608d4 100644 --- a/parquet/src/test/java/org/apache/iceberg/avro/TestReadProjection.java +++ b/parquet/src/test/java/org/apache/iceberg/avro/TestReadProjection.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import org.apache.avro.AvroRuntimeException; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; @@ -148,7 +149,7 @@ public void testBasicProjection() throws Exception { ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); - Assert.assertNull("Should not project data", projected.get("data")); + assertNotProjected("Should not project data", projected, "data"); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Schema dataOnly = new Schema( @@ -157,7 +158,7 @@ public void testBasicProjection() throws Exception { projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); @@ -210,9 +211,8 @@ public void testNestedStructProjection() throws Exception { ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); - Record projectedLocation = (Record) projected.get("location"); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project location", projectedLocation); + assertNotProjected("Should not project location", projected, "location"); Schema latOnly = new Schema( Types.NestedField.optional(3, "location", Types.StructType.of( @@ -221,10 +221,10 @@ public void testNestedStructProjection() throws Exception { ); projected = writeAndRead("latitude_only", writeSchema, latOnly, record); - projectedLocation = (Record) projected.get("location"); - Assert.assertNull("Should not project id", projected.get("id")); + Record projectedLocation = (Record) projected.get("location"); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project location", projected.get("location")); - Assert.assertNull("Should not project longitude", projectedLocation.get("long")); + assertNotProjected("Should not project longitude", projectedLocation, "long"); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.get("lat"), 0.000001f); @@ -236,16 +236,16 @@ public void testNestedStructProjection() throws Exception { projected = writeAndRead("longitude_only", writeSchema, longOnly, record); projectedLocation = (Record) projected.get("location"); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project location", projected.get("location")); - Assert.assertNull("Should not project latitutde", projectedLocation.get("lat")); + assertNotProjected("Should not project latitutde", projectedLocation, "lat"); Assert.assertEquals("Should project longitude", -1.539054f, (float) projectedLocation.get("long"), 0.000001f); Schema locationOnly = writeSchema.select("location"); projected = writeAndRead("location_only", writeSchema, locationOnly, record); projectedLocation = (Record) projected.get("location"); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project location", projected.get("location")); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.get("lat"), 0.000001f); @@ -273,23 +273,23 @@ public void testMapProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project properties map", projected.get("properties")); + assertNotProjected("Should not project properties map", projected, "properties"); Schema keyOnly = writeSchema.select("properties.key"); projected = writeAndRead("key_only", writeSchema, keyOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.get("properties"))); Schema valueOnly = writeSchema.select("properties.value"); projected = writeAndRead("value_only", writeSchema, valueOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.get("properties"))); Schema mapOnly = writeSchema.select("properties"); projected = writeAndRead("map_only", writeSchema, mapOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.get("properties"))); } @@ -337,16 +337,16 @@ public void testMapOfStructsProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project locations map", projected.get("locations")); + assertNotProjected("Should not project locations map", projected, "locations"); projected = writeAndRead("all_locations", writeSchema, writeSchema.select("locations"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project locations map", record.get("locations"), toStringMap((Map) projected.get("locations"))); projected = writeAndRead("lat_only", writeSchema, writeSchema.select("locations.lat"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Map locations = toStringMap((Map) projected.get("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", @@ -355,28 +355,28 @@ public void testMapOfStructsProjection() throws IOException { Assert.assertNotNull("L1 should not be null", projectedL1); Assert.assertEquals("L1 should contain lat", 53.992811f, (float) projectedL1.get("lat"), 0.000001); - Assert.assertNull("L1 should not contain long", projectedL1.get("long")); + assertNotProjected("L1 should not contain long", projectedL1, "long"); Record projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); Assert.assertEquals("L2 should contain lat", 52.995143f, (float) projectedL2.get("lat"), 0.000001); - Assert.assertNull("L2 should not contain long", projectedL2.get("long")); + assertNotProjected("L2 should not contain long", projectedL2, "long"); projected = writeAndRead("long_only", writeSchema, writeSchema.select("locations.long"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); locations = toStringMap((Map) projected.get("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", Sets.newHashSet("L1", "L2"), locations.keySet()); projectedL1 = (Record) locations.get("L1"); Assert.assertNotNull("L1 should not be null", projectedL1); - Assert.assertNull("L1 should not contain lat", projectedL1.get("lat")); + assertNotProjected("L1 should not contain lat", projectedL1, "lat"); Assert.assertEquals("L1 should contain long", -1.542616f, (float) projectedL1.get("long"), 0.000001); projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); - Assert.assertNull("L2 should not contain lat", projectedL2.get("lat")); + assertNotProjected("L2 should not contain lat", projectedL2, "lat"); Assert.assertEquals("L2 should contain long", -1.539054f, (float) projectedL2.get("long"), 0.000001); @@ -390,7 +390,7 @@ public void testMapOfStructsProjection() throws IOException { ); projected = writeAndRead("latitude_renamed", writeSchema, latitiudeRenamed, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); locations = toStringMap((Map) projected.get("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", @@ -399,14 +399,14 @@ public void testMapOfStructsProjection() throws IOException { Assert.assertNotNull("L1 should not be null", projectedL1); Assert.assertEquals("L1 should contain latitude", 53.992811f, (float) projectedL1.get("latitude"), 0.000001); - Assert.assertNull("L1 should not contain lat", projectedL1.get("lat")); - Assert.assertNull("L1 should not contain long", projectedL1.get("long")); + assertNotProjected("L1 should not contain lat", projectedL1, "lat"); + assertNotProjected("L1 should not contain long", projectedL1, "long"); projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); Assert.assertEquals("L2 should contain latitude", 52.995143f, (float) projectedL2.get("latitude"), 0.000001); - Assert.assertNull("L2 should not contain lat", projectedL2.get("lat")); - Assert.assertNull("L2 should not contain long", projectedL2.get("long")); + assertNotProjected("L2 should not contain lat", projectedL2, "lat"); + assertNotProjected("L2 should not contain long", projectedL2, "long"); } @Test @@ -429,16 +429,16 @@ public void testListProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project values list", projected.get("values")); + assertNotProjected("Should not project values list", projected, "values"); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire list", values, projected.get("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project entire list", values, projected.get("values")); } @@ -473,35 +473,35 @@ public void testListOfStructsProjection() throws IOException { Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); - Assert.assertNull("Should not project points list", projected.get("points")); + assertNotProjected("Should not project points list", projected, "points"); projected = writeAndRead("all_points", writeSchema, writeSchema.select("points"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertEquals("Should project points list", record.get("points"), projected.get("points")); projected = writeAndRead("x_only", writeSchema, writeSchema.select("points.x"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project points list", projected.get("points")); List points = (List) projected.get("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); Record projectedP1 = points.get(0); Assert.assertEquals("Should project x", 1, (int) projectedP1.get("x")); - Assert.assertNull("Should not project y", projectedP1.get("y")); + assertNotProjected("Should not project y", projectedP1, "y"); Record projectedP2 = points.get(1); Assert.assertEquals("Should project x", 3, (int) projectedP2.get("x")); - Assert.assertNull("Should not project y", projectedP2.get("y")); + assertNotProjected("Should not project y", projectedP2, "y"); projected = writeAndRead("y_only", writeSchema, writeSchema.select("points.y"), record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project points list", projected.get("points")); points = (List) projected.get("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); projectedP1 = points.get(0); - Assert.assertNull("Should not project x", projectedP1.get("x")); + assertNotProjected("Should not project x", projectedP1, "x"); Assert.assertEquals("Should project y", 2, (int) projectedP1.get("y")); projectedP2 = points.get(1); - Assert.assertNull("Should not project x", projectedP2.get("x")); + assertNotProjected("Should not project x", projectedP2, "x"); Assert.assertEquals("Should project null y", null, projectedP2.get("y")); Schema yRenamed = new Schema( @@ -513,17 +513,26 @@ public void testListOfStructsProjection() throws IOException { ); projected = writeAndRead("y_renamed", writeSchema, yRenamed, record); - Assert.assertNull("Should not project id", projected.get("id")); + assertNotProjected("Should not project id", projected, "id"); Assert.assertNotNull("Should project points list", projected.get("points")); points = (List) projected.get("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); projectedP1 = points.get(0); - Assert.assertNull("Should not project x", projectedP1.get("x")); - Assert.assertNull("Should not project y", projectedP1.get("y")); + assertNotProjected("Should not project x", projectedP1, "x"); + assertNotProjected("Should not project y", projectedP1, "y"); Assert.assertEquals("Should project z", 2, (int) projectedP1.get("z")); projectedP2 = points.get(1); - Assert.assertNull("Should not project x", projectedP2.get("x")); - Assert.assertNull("Should not project y", projectedP2.get("y")); + assertNotProjected("Should not project x", projectedP2, "x"); + assertNotProjected("Should not project y", projectedP2, "y"); Assert.assertEquals("Should project null z", null, projectedP2.get("z")); } + + public static void assertNotProjected(String message, Record projected, String fieldName) { + try { + projected.get(fieldName); + Assert.fail(message); + } catch (Exception e) { + Assert.assertTrue(e instanceof AvroRuntimeException); + } + } } diff --git a/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java b/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java index 965659bc2..e521d1984 100644 --- a/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java +++ b/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java @@ -85,9 +85,10 @@ public static void assertEqualsSafe(Types.StructType struct, Record rec, Row row public static void assertEqualsBatch(Types.StructType struct, Iterator expected, ColumnarBatch batch, boolean checkArrowValidityVector) { + List fields = struct.fields(); for (int rowId = 0; rowId < batch.numRows(); rowId++) { - List fields = struct.fields(); InternalRow row = batch.getRow(rowId); + Assert.assertEquals("struct number of fields should equal row number of fields", fields.size(), row.numFields()); Record rec = expected.next(); for (int i = 0; i < fields.size(); i += 1) { Type fieldType = fields.get(i).type(); diff --git a/versions.props b/versions.props index 08626174c..786e340f5 100644 --- a/versions.props +++ b/versions.props @@ -1,5 +1,5 @@ org.slf4j:* = 1.7.25 -org.apache.avro:avro = 1.9.2 +org.apache.avro:avro = 1.10.2 org.apache.flink:* = 1.11.0 org.apache.hadoop:* = 2.7.3 org.apache.hive:hive-metastore = 2.3.7