Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data loader core key and column utils #1771

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions core/src/main/java/com/scalar/db/common/error/CoreError.java
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,18 @@ public enum CoreError implements ScalarDbError {
"Invalid file extension: %s. Allowed extensions are: %s",
"",
""),
DATA_LOADER_INVALID_COLUMN_NON_EXISTENT(
Category.USER_ERROR, "0136", "Invalid key: Column %s does not exist in the table.", "", ""),
DATA_LOADER_INVALID_VALUE_KEY_PARSING_FAILED(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad. It has become unused due to refactoring the error handling based on Toshi's feedback. I have removed it now.

Category.USER_ERROR,
"0137",
"Parsing of key value %s failed for table %s. Details:%s.",
"",
""),
DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE(
Category.USER_ERROR, "0138", "Invalid base64 encoding for blob value for column %s", "", ""),
DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE(
Category.USER_ERROR, "0139", "Invalid number specified for column %s", "", ""),

//
// Errors for the concurrency error category
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.scalar.db.dataloader.core.exception;

/** Exception thrown when an error occurs while trying to encode or decode base64 values. */
public class Base64Exception extends Exception {

/**
* Class constructor
*
* @param message Exception message
*/
public Base64Exception(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.scalar.db.dataloader.core.exception;

public class KeyParsingException extends Exception {

public KeyParsingException(String message) {
super(message);
}

public KeyParsingException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.scalar.db.dataloader.core.util;

import com.scalar.db.common.error.CoreError;
import com.scalar.db.dataloader.core.exception.Base64Exception;
import com.scalar.db.io.BigIntColumn;
import com.scalar.db.io.BlobColumn;
import com.scalar.db.io.BooleanColumn;
import com.scalar.db.io.Column;
import com.scalar.db.io.DataType;
import com.scalar.db.io.DoubleColumn;
import com.scalar.db.io.FloatColumn;
import com.scalar.db.io.IntColumn;
import com.scalar.db.io.TextColumn;
import java.util.Base64;

/** Utility class for dealing and creating ScalarDB Columns */
public final class ColumnUtils {
private ColumnUtils() {
// restrict instantiation
}

/**
* Create a ScalarDB column from the given data type, column name, and value. Blob source values
* need to be base64 encoded.
*
* @param dataType Data type of the specified column
* @param columnName ScalarDB table column name
* @param value Value for the ScalarDB column
* @return ScalarDB column
* @throws Base64Exception if an error occurs while base64 decoding
*/
public static Column<?> createColumnFromValue(DataType dataType, String columnName, String value)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding @Nullable for nullable parameters would be helpful:

Suggested change
public static Column<?> createColumnFromValue(DataType dataType, String columnName, String value)
public static Column<?> createColumnFromValue(DataType dataType, String columnName, Nullable String value)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

throws Base64Exception {
try {
switch (dataType) {
case BOOLEAN:
return value != null
? BooleanColumn.of(columnName, Boolean.parseBoolean(value))
: BooleanColumn.ofNull(columnName);
case INT:
return value != null
? IntColumn.of(columnName, Integer.parseInt(value))
: IntColumn.ofNull(columnName);
case BIGINT:
return value != null
? BigIntColumn.of(columnName, Long.parseLong(value))
: BigIntColumn.ofNull(columnName);
case FLOAT:
return value != null
? FloatColumn.of(columnName, Float.parseFloat(value))
: FloatColumn.ofNull(columnName);
case DOUBLE:
return value != null
? DoubleColumn.of(columnName, Double.parseDouble(value))
: DoubleColumn.ofNull(columnName);
case TEXT:
return value != null ? TextColumn.of(columnName, value) : TextColumn.ofNull(columnName);
case BLOB:
// Source blob values need to be base64 encoded
return value != null
? BlobColumn.of(columnName, Base64.getDecoder().decode(value))
: BlobColumn.ofNull(columnName);
default:
throw new AssertionError();
}
} catch (NumberFormatException e) {
throw new NumberFormatException(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName));
} catch (IllegalArgumentException e) {
throw new Base64Exception(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName));
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should set the cause of the exceptions:

Suggested change
} catch (NumberFormatException e) {
throw new NumberFormatException(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName));
} catch (IllegalArgumentException e) {
throw new Base64Exception(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName));
}
} catch (NumberFormatException e) {
throw new NumberFormatException(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName), e);
} catch (IllegalArgumentException e) {
throw new Base64Exception(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName), e);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Although to fix this I had to revise the used exceptions a bit.

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.scalar.db.dataloader.core.util;

import com.scalar.db.api.TableMetadata;
import com.scalar.db.common.error.CoreError;
import com.scalar.db.dataloader.core.ColumnKeyValue;
import com.scalar.db.dataloader.core.exception.Base64Exception;
import com.scalar.db.dataloader.core.exception.KeyParsingException;
import com.scalar.db.io.Column;
import com.scalar.db.io.DataType;
import com.scalar.db.io.Key;
import javax.annotation.Nullable;

/** Utility class for creating and dealing with ScalarDB keys. */
public final class KeyUtils {

private KeyUtils() {
// restrict instantiation
}

/**
* Convert a keyValue, in the format of <key>=<value>, to a ScalarDB Key instance for a specific
* ScalarDB table.
*
* @param columnKeyValue A key value in the format of <key>=<value>
* @param tableName Name of the ScalarDB table
* @param tableMetadata Metadata for one ScalarDB table
* @return A new ScalarDB Key instance formatted by data type
* @throws KeyParsingException if there is an error parsing the key value
*/
@Nullable
public static Key parseKeyValue(
ypeckstadt marked this conversation as resolved.
Show resolved Hide resolved
@Nullable ColumnKeyValue columnKeyValue, String tableName, TableMetadata tableMetadata)
throws KeyParsingException {
if (columnKeyValue == null) {
return null;
}
String columnName = columnKeyValue.getColumnName();
DataType columnDataType = tableMetadata.getColumnDataType(columnName);
if (columnDataType == null) {
throw new KeyParsingException(
CoreError.DATA_LOADER_INVALID_COLUMN_NON_EXISTENT.buildMessage(columnName));
}
try {
return createKey(columnDataType, columnName, columnKeyValue.getColumnValue());
} catch (Base64Exception e) {
throw new KeyParsingException(
CoreError.DATA_LOADER_INVALID_VALUE_KEY_PARSING_FAILED.buildMessage(
columnKeyValue.getColumnValue(), tableName, e.getMessage()));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

Suggested change
throw new KeyParsingException(
CoreError.DATA_LOADER_INVALID_VALUE_KEY_PARSING_FAILED.buildMessage(
columnKeyValue.getColumnValue(), tableName, e.getMessage()));
throw new KeyParsingException(
CoreError.DATA_LOADER_INVALID_VALUE_KEY_PARSING_FAILED.buildMessage(
columnKeyValue.getColumnValue(), tableName, e.getMessage()), e);

}
}

/**
* Create a ScalarDB key based on the provided data type, column name, and value.
*
* @param dataType Data type of the specified column
* @param columnName ScalarDB table column name
* @param value Value for ScalarDB key
* @return ScalarDB Key instance
* @throws Base64Exception if there is an error creating the key value
*/
public static Key createKey(DataType dataType, String columnName, String value)
throws Base64Exception {
Column<?> keyValue = ColumnUtils.createColumnFromValue(dataType, columnName, value);
return Key.newBuilder().add(keyValue).build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package com.scalar.db.dataloader.core.util;

import static org.junit.jupiter.api.Assertions.*;

import com.scalar.db.common.error.CoreError;
import com.scalar.db.dataloader.core.exception.Base64Exception;
import com.scalar.db.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class ColumnUtilsTest {

private static final float FLOAT_VALUE = 2.78f;

private static Stream<Arguments> provideColumnsForCreateColumnFromValue() {
return Stream.of(
Arguments.of(DataType.BOOLEAN, "boolColumn", "true", BooleanColumn.of("boolColumn", true)),
Arguments.of(DataType.BOOLEAN, "boolColumn", null, BooleanColumn.ofNull("boolColumn")),
Arguments.of(DataType.INT, "intColumn", "42", IntColumn.of("intColumn", 42)),
Arguments.of(DataType.INT, "intColumn", null, IntColumn.ofNull("intColumn")),
Arguments.of(
DataType.BIGINT,
"bigintColumn",
"123456789012",
BigIntColumn.of("bigintColumn", 123456789012L)),
Arguments.of(DataType.BIGINT, "bigintColumn", null, BigIntColumn.ofNull("bigintColumn")),
Arguments.of(
DataType.FLOAT,
"floatColumn",
Float.toString(FLOAT_VALUE),
FloatColumn.of("floatColumn", FLOAT_VALUE)),
Arguments.of(DataType.FLOAT, "floatColumn", null, FloatColumn.ofNull("floatColumn")),
Arguments.of(
DataType.DOUBLE,
"doubleColumn",
Double.toString(Math.E),
DoubleColumn.of("doubleColumn", Math.E)),
Arguments.of(DataType.DOUBLE, "doubleColumn", null, DoubleColumn.ofNull("doubleColumn")),
Arguments.of(
DataType.TEXT,
"textColumn",
"Hello, world!",
TextColumn.of("textColumn", "Hello, world!")),
Arguments.of(DataType.TEXT, "textColumn", null, TextColumn.ofNull("textColumn")),
Arguments.of(
DataType.BLOB,
"blobColumn",
Base64.getEncoder().encodeToString("binary".getBytes(StandardCharsets.UTF_8)),
BlobColumn.of("blobColumn", "binary".getBytes(StandardCharsets.UTF_8))),
Arguments.of(DataType.BLOB, "blobColumn", null, BlobColumn.ofNull("blobColumn")));
}

@ParameterizedTest
@MethodSource("provideColumnsForCreateColumnFromValue")
void createColumnFromValue_validInput_returnsColumn(
DataType dataType, String columnName, String value, Column<?> expectedColumn)
throws Base64Exception {
Column<?> actualColumn = ColumnUtils.createColumnFromValue(dataType, columnName, value);
assertEquals(expectedColumn, actualColumn);
}

@Test
void createColumnFromValue_invalidNumberFormat_throwsNumberFormatException() {
String columnName = "intColumn";
String value = "not_a_number";
NumberFormatException exception =
assertThrows(
NumberFormatException.class,
() -> ColumnUtils.createColumnFromValue(DataType.INT, columnName, value));
assertEquals(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName),
exception.getMessage());
}

@Test
void createColumnFromValue_invalidBase64_throwsBase64Exception() {
String columnName = "blobColumn";
String value = "invalid_base64";
Base64Exception exception =
assertThrows(
Base64Exception.class,
() -> ColumnUtils.createColumnFromValue(DataType.BLOB, columnName, value));
assertEquals(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName),
exception.getMessage());
}
}
Loading
Loading