Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data loader core key and column utils #1771

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions core/src/main/java/com/scalar/db/common/error/CoreError.java
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,14 @@ public enum CoreError implements ScalarDbError {
"Invalid file extension: %s. Allowed extensions are: %s",
"",
""),
DATA_LOADER_INVALID_COLUMN_KEY_PARSING_FAILED(
ypeckstadt marked this conversation as resolved.
Show resolved Hide resolved
Category.USER_ERROR, "0136", "Invalid key: Column %s does not exist in the table.", "", ""),
DATA_LOADER_INVALID_VALUE_KEY_PARSING_FAILED(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad. It has become unused due to refactoring the error handling based on Toshi's feedback. I have removed it now.

Category.USER_ERROR, "0137", "Parsing of key value %s failed. Details:%s.", "", ""),
DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE(
Category.USER_ERROR, "0138", "Invalid base64 encoding for blob value for column %s", "", ""),
DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE(
Category.USER_ERROR, "0139", "Invalid number specified for column %s", "", ""),

//
// Errors for the concurrency error category
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.scalar.db.dataloader.core.exception;

/** Exception thrown when an error occurs while trying to encode or decode base64 values. */
public class Base64Exception extends Exception {

/**
* Class constructor
*
* @param message Exception message
*/
public Base64Exception(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.scalar.db.dataloader.core.exception;

public class KeyParsingException extends Exception {

public KeyParsingException(String message) {
super(message);
}

public KeyParsingException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.scalar.db.dataloader.core.util;

import com.scalar.db.common.error.CoreError;
import com.scalar.db.dataloader.core.exception.Base64Exception;
import com.scalar.db.io.BigIntColumn;
import com.scalar.db.io.BlobColumn;
import com.scalar.db.io.BooleanColumn;
import com.scalar.db.io.Column;
import com.scalar.db.io.DataType;
import com.scalar.db.io.DoubleColumn;
import com.scalar.db.io.FloatColumn;
import com.scalar.db.io.IntColumn;
import com.scalar.db.io.TextColumn;
import java.util.Base64;

/** Utility class for dealing and creating ScalarDB Columns */
public final class ColumnUtils {
private ColumnUtils() {
// restrict instantiation
}

/**
* Create a ScalarDB column from the given data type, column name, and value. Blob source values
* need to be base64 encoded.
*
* @param dataType Data type of the specified column
* @param columnName ScalarDB table column name
* @param value Value for the ScalarDB column
* @return ScalarDB column
* @throws Base64Exception if an error occurs while base64 decoding
*/
public static Column<?> createColumnFromValue(DataType dataType, String columnName, String value)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding @Nullable for nullable parameters would be helpful:

Suggested change
public static Column<?> createColumnFromValue(DataType dataType, String columnName, String value)
public static Column<?> createColumnFromValue(DataType dataType, String columnName, Nullable String value)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

throws Base64Exception {
try {
switch (dataType) {
case BOOLEAN:
return value != null
? BooleanColumn.of(columnName, Boolean.parseBoolean(value))
: BooleanColumn.ofNull(columnName);
case INT:
return value != null
? IntColumn.of(columnName, Integer.parseInt(value))
: IntColumn.ofNull(columnName);
case BIGINT:
return value != null
? BigIntColumn.of(columnName, Long.parseLong(value))
: BigIntColumn.ofNull(columnName);
case FLOAT:
return value != null
? FloatColumn.of(columnName, Float.parseFloat(value))
: FloatColumn.ofNull(columnName);
case DOUBLE:
return value != null
? DoubleColumn.of(columnName, Double.parseDouble(value))
: DoubleColumn.ofNull(columnName);
case TEXT:
return value != null ? TextColumn.of(columnName, value) : TextColumn.ofNull(columnName);
case BLOB:
// Source blob values need to be base64 encoded
return value != null
? BlobColumn.of(columnName, Base64.getDecoder().decode(value))
: BlobColumn.ofNull(columnName);
default:
throw new AssertionError();
}
} catch (NumberFormatException e) {
throw new NumberFormatException(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName));
} catch (IllegalArgumentException e) {
throw new Base64Exception(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName));
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should set the cause of the exceptions:

Suggested change
} catch (NumberFormatException e) {
throw new NumberFormatException(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName));
} catch (IllegalArgumentException e) {
throw new Base64Exception(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName));
}
} catch (NumberFormatException e) {
throw new NumberFormatException(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName), e);
} catch (IllegalArgumentException e) {
throw new Base64Exception(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName), e);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Although to fix this I had to revise the used exceptions a bit.

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package com.scalar.db.dataloader.core.util;

import com.scalar.db.api.TableMetadata;
import com.scalar.db.common.error.CoreError;
import com.scalar.db.dataloader.core.ColumnKeyValue;
import com.scalar.db.dataloader.core.exception.Base64Exception;
import com.scalar.db.dataloader.core.exception.KeyParsingException;
import com.scalar.db.io.Column;
import com.scalar.db.io.DataType;
import com.scalar.db.io.Key;
import javax.annotation.Nullable;

/** Utility class for creating and dealing with ScalarDB keys. */
public final class KeyUtils {

private KeyUtils() {
// restrict instantiation
}

/**
* Convert a keyValue, in the format of <key>=<value>, to a ScalarDB Key instance.
*
* @param columnKeyValue A key value in the format of <key>=<value>
* @param tableMetadata Metadata for one ScalarDB table
* @return A new ScalarDB Key instance formatted by data type
* @throws KeyParsingException if there is an error parsing the key value
*/
public static Key parseKeyValue(
ypeckstadt marked this conversation as resolved.
Show resolved Hide resolved
@Nullable ColumnKeyValue columnKeyValue, TableMetadata tableMetadata)
throws KeyParsingException {
if (columnKeyValue == null) {
return null;
}
String columnName = columnKeyValue.getColumnName();
DataType columnDataType = tableMetadata.getColumnDataType(columnName);
if (columnDataType == null) {
throw new KeyParsingException(
CoreError.DATA_LOADER_INVALID_COLUMN_KEY_PARSING_FAILED.buildMessage(columnName));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering what if there are 3 tables that have name column and this exception is thrown for the second table. Users may want to know which table...? What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the method and now passing in the table name as well. I had to add it because the table name is not part of the TableMetadata.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ypeckstadt Thanks for improving the error message!

But, sorry, I should've noticed this when posting the previous comment, other error messages (e.g., DATA_LOADER_INVALID_COLUMN_NON_EXISTENT) have the same issue. Also, the namespace name should be added as well. I think we have 2 options as follows:

  1. Add namespace and table names to all the error messages if needed
  2. The class is a util class for keys, so it may be okay to include only key name in the error messages from KeyUtils. Instead, we'll catch exceptions from KeyUtils and wrap it with the namespace and table names like this:
  try {
    :
    // This exception doesn't contain the namespace or table name.
    Key key = KeyUtils.parseKeyValue(columnKeyValue, tableMetadata);
  } catch (KeyParsingException e) {
    // Add the namespace and table names information.
    throw new KeyParsingException(
       CoreError.DATA_LOADER_XXXX_FAILED.buildMessage(namespaceName, tableName), e);
  }

I think either is fine. What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made changes based on the above feedback. I went for option 1 as it seems like the safest solution. Option 2 is good as well, but then it shifts the responsibility for proper exception handling and logging to all classes that use the Key Utils. So it creates the risk that some classes do it and some don't.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the improvement!

It looks like there is room to improve the other error messages from the same perspective (i.e., missing information about namespace and table). What do you think?

Copy link
Contributor Author

@ypeckstadt ypeckstadt Jul 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@komamitsu My apologies for the late reply. (had to focus on ScalarFlow more). I have updated the PR and ensured each exception message includes the namespace and table name.

To avoid having to pass in all 3 with all methods, I have added a wrapper class called ColumnInfo that includes all 3 fields.

}
try {
return createKey(columnDataType, columnName, columnKeyValue.getColumnValue());
} catch (Base64Exception e) {
throw new KeyParsingException(
CoreError.DATA_LOADER_INVALID_VALUE_KEY_PARSING_FAILED.buildMessage(
columnKeyValue.getColumnValue(), e.getMessage()));
}
}

/**
* Create a ScalarDB key based on the provided data type, column name, and value.
*
* @param dataType Data type of the specified column
* @param columnName ScalarDB table column name
* @param value Value for ScalarDB key
* @return ScalarDB Key instance
* @throws Base64Exception if there is an error creating the key value
*/
public static Key createKey(DataType dataType, String columnName, String value)
throws Base64Exception {
Column<?> keyValue = ColumnUtils.createColumnFromValue(dataType, columnName, value);
return Key.newBuilder().add(keyValue).build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package com.scalar.db.dataloader.core.util;

import static org.junit.jupiter.api.Assertions.*;

import com.scalar.db.common.error.CoreError;
import com.scalar.db.dataloader.core.exception.Base64Exception;
import com.scalar.db.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class ColumnUtilsTest {

private static final float FLOAT_VALUE = 2.78f;

private static Stream<Arguments> provideColumnsForCreateColumnFromValue() {
return Stream.of(
Arguments.of(DataType.BOOLEAN, "boolColumn", "true", BooleanColumn.of("boolColumn", true)),
Arguments.of(DataType.BOOLEAN, "boolColumn", null, BooleanColumn.ofNull("boolColumn")),
Arguments.of(DataType.INT, "intColumn", "42", IntColumn.of("intColumn", 42)),
Arguments.of(DataType.INT, "intColumn", null, IntColumn.ofNull("intColumn")),
Arguments.of(
DataType.BIGINT,
"bigintColumn",
"123456789012",
BigIntColumn.of("bigintColumn", 123456789012L)),
Arguments.of(DataType.BIGINT, "bigintColumn", null, BigIntColumn.ofNull("bigintColumn")),
Arguments.of(
DataType.FLOAT,
"floatColumn",
Float.toString(FLOAT_VALUE),
FloatColumn.of("floatColumn", FLOAT_VALUE)),
Arguments.of(DataType.FLOAT, "floatColumn", null, FloatColumn.ofNull("floatColumn")),
Arguments.of(
DataType.DOUBLE,
"doubleColumn",
Double.toString(Math.E),
DoubleColumn.of("doubleColumn", Math.E)),
Arguments.of(DataType.DOUBLE, "doubleColumn", null, DoubleColumn.ofNull("doubleColumn")),
Arguments.of(
DataType.TEXT,
"textColumn",
"Hello, world!",
TextColumn.of("textColumn", "Hello, world!")),
Arguments.of(DataType.TEXT, "textColumn", null, TextColumn.ofNull("textColumn")),
Arguments.of(
DataType.BLOB,
"blobColumn",
Base64.getEncoder().encodeToString("binary".getBytes(StandardCharsets.UTF_8)),
BlobColumn.of("blobColumn", "binary".getBytes(StandardCharsets.UTF_8))),
Arguments.of(DataType.BLOB, "blobColumn", null, BlobColumn.ofNull("blobColumn")));
}

@ParameterizedTest
@MethodSource("provideColumnsForCreateColumnFromValue")
void createColumnFromValue_validInput_returnsColumn(
DataType dataType, String columnName, String value, Column<?> expectedColumn)
throws Base64Exception {
Column<?> actualColumn = ColumnUtils.createColumnFromValue(dataType, columnName, value);
assertEquals(expectedColumn, actualColumn);
}

@Test
void createColumnFromValue_invalidNumberFormat_throwsNumberFormatException() {
String columnName = "intColumn";
String value = "not_a_number";
NumberFormatException exception =
assertThrows(
NumberFormatException.class,
() -> ColumnUtils.createColumnFromValue(DataType.INT, columnName, value));
assertEquals(
CoreError.DATA_LOADER_INVALID_NUMBER_FORMAT_FOR_COLUMN_VALUE.buildMessage(columnName),
exception.getMessage());
}

@Test
void createColumnFromValue_invalidBase64_throwsBase64Exception() {
String columnName = "blobColumn";
String value = "invalid_base64";
Base64Exception exception =
assertThrows(
Base64Exception.class,
() -> ColumnUtils.createColumnFromValue(DataType.BLOB, columnName, value));
assertEquals(
CoreError.DATA_LOADER_INVALID_BASE64_ENCODING_FOR_COLUMN_VALUE.buildMessage(columnName),
exception.getMessage());
}
}
Loading
Loading