Skip to content

Commit

Permalink
[#2039] Support default value semantic for AVRO (#75)
Browse files Browse the repository at this point in the history
(cherry picked from commit c18f4c4)
  • Loading branch information
Shenoda Guirguis authored Jun 21, 2021
1 parent 75c87e2 commit 78a8c43
Show file tree
Hide file tree
Showing 16 changed files with 1,138 additions and 53 deletions.
8 changes: 4 additions & 4 deletions api/src/main/java/org/apache/iceberg/types/PruneColumns.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ public Type struct(Types.StructType struct, List<Type> fieldResults) {
} else if (projectedType != null) {
sameTypes = false; // signal that some types were altered
if (field.isOptional()) {
selectedFields.add(
Types.NestedField.optional(field.fieldId(), field.name(), projectedType, field.doc()));
selectedFields.add(Types.NestedField.optional(
field.fieldId(), field.name(), projectedType, field.getDefaultValue(), field.doc()));
} else {
selectedFields.add(
Types.NestedField.required(field.fieldId(), field.name(), projectedType, field.doc()));
selectedFields.add(Types.NestedField.required(
field.fieldId(), field.name(), projectedType, field.getDefaultValue(), field.doc()));
}
}
}
Expand Down
104 changes: 93 additions & 11 deletions api/src/main/java/org/apache/iceberg/types/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.iceberg.types;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -415,42 +416,112 @@ public int hashCode() {

public static class NestedField implements Serializable {
public static NestedField optional(int id, String name, Type type) {
return new NestedField(true, id, name, type, null);
return new NestedField(true, id, name, type, null, null);
}

public static NestedField optional(int id, String name, Type type, String doc) {
return new NestedField(true, id, name, type, doc);
return new NestedField(true, id, name, type, null, doc);
}

public static NestedField optional(int id, String name, Type type, Object defaultValue, String doc) {
return new NestedField(true, id, name, type, defaultValue, doc);
}

public static NestedField required(int id, String name, Type type) {
return new NestedField(false, id, name, type, null);
return new NestedField(false, id, name, type, null, null);
}

public static NestedField required(int id, String name, Type type, String doc) {
return new NestedField(false, id, name, type, doc);
return new NestedField(false, id, name, type, null, doc);
}

public static NestedField required(int id, String name, Type type, Object defaultValue, String doc) {
return new NestedField(false, id, name, type, defaultValue, doc);
}

public static NestedField of(int id, boolean isOptional, String name, Type type) {
return new NestedField(isOptional, id, name, type, null);
return new NestedField(isOptional, id, name, type, null, null);
}

public static NestedField of(int id, boolean isOptional, String name, Type type, String doc) {
return new NestedField(isOptional, id, name, type, doc);
return new NestedField(isOptional, id, name, type, null, doc);
}

public static NestedField of(int id, boolean isOptional, String name, Type type, Object defaultValue, String doc) {
return new NestedField(isOptional, id, name, type, defaultValue, doc);
}

private static void validateDefaultValue(Object defaultValue, Type type) {
if (defaultValue == null) {
return;
}
switch (type.typeId()) {
case STRUCT:
Preconditions.checkArgument(Map.class.isInstance(defaultValue),
"defaultValue should be a Map from fields names to values, for StructType");
Map<String, Object> defaultStruct = (Map<String, Object>) defaultValue;
if (defaultStruct.isEmpty()) {
return;
}
Preconditions.checkArgument(defaultStruct.size() == type.asStructType().fields().size());
for (String fieldName : defaultStruct.keySet()) {
NestedField.validateDefaultValue(defaultStruct.get(fieldName), type.asStructType().field(fieldName).type);
}
break;

case LIST:
Preconditions.checkArgument(defaultValue instanceof ArrayList,
"defaultValue should be an ArrayList of Objects, for ListType");
List<Object> defaultArrayList = (ArrayList<Object>) defaultValue;
if (defaultArrayList.size() == 0) {
return;
}
defaultArrayList.forEach(dv -> NestedField.validateDefaultValue(dv, type.asListType().elementField.type));
break;

case MAP:
Preconditions.checkArgument(Map.class.isInstance(defaultValue),
"defaultValue should be an instance of Map for MapType");
Map<Object, Object> defaultMap = (Map<Object, Object>) defaultValue;
if (defaultMap.isEmpty()) {
return;
}
for (Map.Entry e : defaultMap.entrySet()) {
NestedField.validateDefaultValue(e.getKey(), type.asMapType().keyField.type);
NestedField.validateDefaultValue(e.getValue(), type.asMapType().valueField.type);
}
break;

case FIXED:
case BINARY:
Preconditions.checkArgument(byte[].class.isInstance(defaultValue),
"defaultValue should be an instance of byte[] for TypeId.%s, but defaultValue.class = %s",
type.typeId().name(), defaultValue.getClass().getCanonicalName());
break;

default:
Preconditions.checkArgument(type.typeId().javaClass().isInstance(defaultValue),
"defaultValue should be and instance of %s for TypeId.%s, but defaultValue.class = %s",
type.typeId().javaClass(), type.typeId().name(), defaultValue.getClass().getCanonicalName());
}
}

private final boolean isOptional;
private final int id;
private final String name;
private final Type type;
private final Object defaultValue;
private final String doc;

private NestedField(boolean isOptional, int id, String name, Type type, String doc) {
private NestedField(boolean isOptional, int id, String name, Type type, Object defaultValue, String doc) {
Preconditions.checkNotNull(name, "Name cannot be null");
Preconditions.checkNotNull(type, "Type cannot be null");
validateDefaultValue(defaultValue, type);
this.isOptional = isOptional;
this.id = id;
this.name = name;
this.type = type;
this.defaultValue = defaultValue;
this.doc = doc;
}

Expand All @@ -462,7 +533,7 @@ public NestedField asOptional() {
if (isOptional) {
return this;
}
return new NestedField(true, id, name, type, doc);
return new NestedField(true, id, name, type, defaultValue, doc);
}

public boolean isRequired() {
Expand All @@ -473,7 +544,15 @@ public NestedField asRequired() {
if (!isOptional) {
return this;
}
return new NestedField(false, id, name, type, doc);
return new NestedField(false, id, name, type, defaultValue, doc);
}

public boolean hasDefaultValue() {
return defaultValue != null;
}

public Object getDefaultValue() {
return defaultValue;
}

public int fieldId() {
Expand All @@ -496,6 +575,7 @@ public String doc() {
public String toString() {
return String.format("%d: %s: %s %s",
id, name, isOptional ? "optional" : "required", type) +
(hasDefaultValue() ? ", default value: " + defaultValue + ", " : "") +
(doc != null ? " (" + doc + ")" : "");
}

Expand All @@ -514,6 +594,8 @@ public boolean equals(Object o) {
return false;
} else if (!name.equals(that.name)) {
return false;
} else if (!Objects.equals(defaultValue, that.defaultValue)) {
return false;
} else if (!Objects.equals(doc, that.doc)) {
return false;
}
Expand All @@ -522,7 +604,8 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(NestedField.class, id, isOptional, name, type);
return hasDefaultValue() ? Objects.hash(NestedField.class, id, isOptional, name, type, defaultValue) :
Objects.hash(NestedField.class, id, isOptional, name, type);
}
}

Expand Down Expand Up @@ -740,7 +823,6 @@ public boolean equals(Object o) {
} else if (!(o instanceof ListType)) {
return false;
}

ListType listType = (ListType) o;
return elementField.equals(listType.elementField);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.types;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import static org.apache.iceberg.types.Types.NestedField;
import static org.apache.iceberg.types.Types.StructType;

public class TestDefaultValuesForContainerTypes {

static NestedField intFieldType;
static NestedField stringFieldType;
static StructType structType;

@BeforeClass
public static void beforeClass() {
intFieldType = NestedField.optional(0, "optionalIntField", Types.IntegerType.get());
stringFieldType = NestedField.required(1, "requiredStringField", Types.StringType.get());
structType = StructType.of(Arrays.asList(intFieldType, stringFieldType));
}

@Test
public void testStructTypeDefault() {
Map<String, Object> structDefaultvalue = new HashMap<>();
structDefaultvalue.put(intFieldType.name(), Integer.valueOf(1));
structDefaultvalue.put(stringFieldType.name(), "two");
NestedField structField = NestedField.optional(2, "optionalStructField", structType, structDefaultvalue, "doc");
Assert.assertTrue(structField.hasDefaultValue());
Assert.assertEquals(structDefaultvalue, structField.getDefaultValue());
}

@Test (expected = IllegalArgumentException.class)
public void testStructTypeDefaultInvalidFieldsTypes() {
List<Object> structDefaultvalue = new ArrayList<>();
structDefaultvalue.add("one");
structDefaultvalue.add("two");
NestedField.optional(2, "optionalStructField", structType, structDefaultvalue, "doc");
}

@Test (expected = IllegalArgumentException.class)
public void testStructTypeDefaultInvalidNumberFields() {
List<Object> structDefaultvalue = new ArrayList<>();
structDefaultvalue.add(Integer.valueOf(1));
structDefaultvalue.add("two");
structDefaultvalue.add("three");
NestedField.optional(2, "optionalStructField", structType, structDefaultvalue, "doc");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.types;

import org.apache.iceberg.types.Types.NestedField;
import org.junit.Assert;
import org.junit.Test;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;


public class TestNestedFieldDefaultValues {

private final int id = 1;
private final String fieldName = "fieldName";
private final Type fieldType = Types.IntegerType.get();
private final String doc = "field doc";
private final Integer defaultValue = 100;

@Test
public void testConstructorsValidCases() {
// optional constructors
Assert.assertFalse(optional(id, fieldName, fieldType).hasDefaultValue());
Assert.assertFalse(optional(id, fieldName, fieldType, doc).hasDefaultValue());
NestedField nestedFieldWithDefault = optional(id, fieldName, fieldType, defaultValue, doc);
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
nestedFieldWithDefault = optional(id, fieldName, fieldType, defaultValue, null);
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());

// required constructors
Assert.assertFalse(required(id, fieldName, fieldType).hasDefaultValue());
Assert.assertFalse(required(id, fieldName, fieldType, doc).hasDefaultValue());
Assert.assertFalse(required(id, fieldName, fieldType, null, doc).hasDefaultValue());
nestedFieldWithDefault = required(id, fieldName, fieldType, defaultValue, doc);
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
nestedFieldWithDefault = required(id, fieldName, fieldType, defaultValue, null);
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());

// of constructors
Assert.assertFalse(NestedField.of(id, true, fieldName, fieldType).hasDefaultValue());
Assert.assertFalse(NestedField.of(id, true, fieldName, fieldType, doc).hasDefaultValue());
nestedFieldWithDefault = NestedField.of(id, true, fieldName, fieldType, defaultValue, doc);
Assert.assertTrue(nestedFieldWithDefault.hasDefaultValue());
Assert.assertEquals(defaultValue, nestedFieldWithDefault.getDefaultValue());
}

@Test (expected = IllegalArgumentException.class)
public void testOptionalWithInvalidDefaultValueClass() {
// class of default value does not match class of type
Long wrongClassDefaultValue = 100L;
optional(id, fieldName, fieldType, wrongClassDefaultValue, doc);
}

@Test (expected = IllegalArgumentException.class)
public void testReqiredWithInvalidDefaultValueClass() {
// class of default value does not match class of type
Long wrongClassDefaultValue = 100L;
required(id, fieldName, fieldType, wrongClassDefaultValue, doc);
}
}
Loading

0 comments on commit 78a8c43

Please sign in to comment.