From dc4ca46036d19f24f0ca303e3bf71f7c9b370130 Mon Sep 17 00:00:00 2001 From: haoke Date: Thu, 22 Dec 2022 16:30:12 +0800 Subject: [PATCH 01/14] [BitSail][core]remove useless dependencies in core-enrty module. --- bitsail-cores/bitsail-core-entry/pom.xml | 249 +----------------- bitsail-cores/bitsail-core-flink-base/pom.xml | 2 + .../typeutils/NativeFlinkTypeInfoUtil.java | 6 +- .../bitsail-core-flink-bridge/pom.xml | 2 + 4 files changed, 9 insertions(+), 250 deletions(-) diff --git a/bitsail-cores/bitsail-core-entry/pom.xml b/bitsail-cores/bitsail-core-entry/pom.xml index 653ed3b7e..aeee01948 100644 --- a/bitsail-cores/bitsail-core-entry/pom.xml +++ b/bitsail-cores/bitsail-core-entry/pom.xml @@ -28,115 +28,13 @@ ${revision} + - bitsail-client-api com.bytedance.bitsail + bitsail-client-api ${revision} - - org.xerial.snappy - snappy-java - ${snappy.version} - - - - - ch.qos.logback - logback-classic - provided - - - ch.qos.logback - logback-access - provided - - - org.slf4j - log4j-over-slf4j - provided - - - - - org.projectlombok - lombok - - - - - org.apache.flink - flink-clients_${scala.binary.version} - - - org.xerial.snappy - snappy-java - - - org.objenesis - objenesis - - - - - org.apache.flink - flink-streaming-java_${scala.binary.version} - - - org.xerial.snappy - snappy-java - - - - - - org.apache.flink - flink-table-api-java-bridge_${scala.binary.version} - - - org.yaml - snakeyaml - - - org.apache.commons - commons-pool2 - - - org.eclipse.jetty - jetty-http - - - - - org.apache.flink - flink-table-api-scala-bridge_${scala.binary.version} - - - - org.apache.flink - flink-scala_${scala.binary.version} - - - org.scala-lang.modules - scala-parser-combinators_${scala.binary.version} - - - - - org.apache.flink - flink-streaming-scala_${scala.binary.version} - - - - - junit - junit - jar - test - - - - com.bytedance.bitsail bitsail-core-api @@ -146,154 +44,11 @@ com.bytedance.bitsail bitsail-base - - - org.xerial.snappy - snappy-java - - com.bytedance.bitsail bitsail-common - - - com.bytedance.las - las-sdk-tunnel - - - - - - org.apache.kafka - kafka-clients - - - parquet-hadoop - org.apache.parquet - - - parquet-column - org.apache.parquet - - - org.slf4j - slf4j-log4j12 - - - metrics-core - io.dropwizard.metrics - - - commons-pool2 - org.apache.commons - - - java-redis-client - com.bytedance - - - snappy-java - org.xerial.snappy - - - com.github.luben - zstd-jni - - - com.bytedance - hadoop-zstd - - - commons-validator - commons-validator - - - ${kafka.clients.version} - - - - org.apache.hadoop - hadoop-yarn-client - - - com.google.inject - guice - - - com.google.inject.extensions - guice-servlet - - - org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-xc - - - org.codehaus.jackson - jackson-jaxrs - - - log4j - log4j - - - ${hadoop.version} - provided - - - org.apache.hadoop - hadoop-common - - - log4j - log4j - - - org.codehaus.jackson - jackson-xc - - - org.codehaus.jackson - jackson-jaxrs - - - org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - - - javax.servlet - servlet-api - - - metrics-core - com.codahale.metrics - - - netty - io.netty - - - org.byted.infsec - dps - - - org.apache.commons - commons-math3 - - diff --git a/bitsail-cores/bitsail-core-flink-base/pom.xml b/bitsail-cores/bitsail-core-flink-base/pom.xml index eb3a23a60..ec3f7fb25 100644 --- a/bitsail-cores/bitsail-core-flink-base/pom.xml +++ b/bitsail-cores/bitsail-core-flink-base/pom.xml @@ -31,12 +31,14 @@ com.bytedance.bitsail bitsail-base ${revision} + provided com.bytedance.bitsail bitsail-common ${revision} + provided diff --git a/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/typeutils/NativeFlinkTypeInfoUtil.java b/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/typeutils/NativeFlinkTypeInfoUtil.java index 5627bfd88..76ae9dd74 100644 --- a/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/typeutils/NativeFlinkTypeInfoUtil.java +++ b/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/typeutils/NativeFlinkTypeInfoUtil.java @@ -106,15 +106,15 @@ public static TypeInfo toTypeInfo(TypeInformation typeInformation) { org.apache.flink.api.java.typeutils.MapTypeInfo mapTypeInfo = (org.apache.flink.api.java.typeutils.MapTypeInfo) typeInformation; return new MapTypeInfo<>( - TypeInfoNativeBridge.bridgeTypeInfo(mapTypeInfo.getKeyTypeInfo()), - TypeInfoNativeBridge.bridgeTypeInfo(mapTypeInfo.getValueTypeInfo()) + toTypeInfo(mapTypeInfo.getKeyTypeInfo()), + toTypeInfo(mapTypeInfo.getValueTypeInfo()) ); } else if (typeInformation instanceof org.apache.flink.api.java.typeutils.ListTypeInfo) { org.apache.flink.api.java.typeutils.ListTypeInfo listTypeInfo = (org.apache.flink.api.java.typeutils.ListTypeInfo) typeInformation; return new ListTypeInfo<>( - TypeInfoNativeBridge.bridgeTypeInfo(listTypeInfo.getElementTypeInfo()) + toTypeInfo(listTypeInfo.getElementTypeInfo()) ); } else { diff --git a/bitsail-cores/bitsail-core-flink-bridge/pom.xml b/bitsail-cores/bitsail-core-flink-bridge/pom.xml index 5ebe06266..61b903d49 100644 --- a/bitsail-cores/bitsail-core-flink-bridge/pom.xml +++ b/bitsail-cores/bitsail-core-flink-bridge/pom.xml @@ -36,12 +36,14 @@ com.bytedance.bitsail bitsail-base ${revision} + provided com.bytedance.bitsail bitsail-common ${revision} + provided From 3651283d88f509ad3864f3e143b31b7cf02fc7dc Mon Sep 17 00:00:00 2001 From: haoke Date: Thu, 22 Dec 2022 16:34:37 +0800 Subject: [PATCH 02/14] [BitSail][core]add kafka client to test. --- bitsail-test/bitsail-connector-test/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bitsail-test/bitsail-connector-test/pom.xml b/bitsail-test/bitsail-connector-test/pom.xml index 627be8293..9c8ade7af 100644 --- a/bitsail-test/bitsail-connector-test/pom.xml +++ b/bitsail-test/bitsail-connector-test/pom.xml @@ -86,6 +86,12 @@ ${elasticsearch.version} + + org.apache.kafka + kafka-clients + ${kafka.clients.version} + + org.elasticsearch.client elasticsearch-rest-high-level-client From 2b6d96044a31baee7dba6d09cc6f884977e74717 Mon Sep 17 00:00:00 2001 From: haoke Date: Tue, 27 Dec 2022 16:31:41 +0800 Subject: [PATCH 03/14] [BitSail][core]Replace class loader to url class loader. --- .../component/DefaultComponentBuilderLoader.java | 4 ++-- .../bitsail/base/packages/LocalFSPluginFinder.java | 11 ++++------- .../filemapping/FileMappingTypeInfoReader.java | 2 +- .../java/com/bytedance/bitsail/core/Engine.java | 14 +++++++++++--- .../bitsail/core/program/ProgramFactory.java | 2 +- .../core/execution/FlinkExecutionEnviron.java | 5 +++++ 6 files changed, 24 insertions(+), 14 deletions(-) diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/component/DefaultComponentBuilderLoader.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/component/DefaultComponentBuilderLoader.java index 9d8901dc2..9a1c33cdd 100644 --- a/bitsail-base/src/main/java/com/bytedance/bitsail/base/component/DefaultComponentBuilderLoader.java +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/component/DefaultComponentBuilderLoader.java @@ -38,11 +38,11 @@ public class DefaultComponentBuilderLoader implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(DefaultComponentBuilderLoader.class); private final Class clazz; private final Map components = Maps.newHashMap(); - private volatile boolean loaded; private final ClassLoader classLoader; + private volatile boolean loaded; public DefaultComponentBuilderLoader(Class clazz) { - this(clazz, DefaultComponentBuilderLoader.class.getClassLoader()); + this(clazz, Thread.currentThread().getContextClassLoader()); } public DefaultComponentBuilderLoader(Class clazz, ClassLoader classLoader) { diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/packages/LocalFSPluginFinder.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/packages/LocalFSPluginFinder.java index 9b0f70334..3b0dd2f96 100644 --- a/bitsail-base/src/main/java/com/bytedance/bitsail/base/packages/LocalFSPluginFinder.java +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/packages/LocalFSPluginFinder.java @@ -41,11 +41,9 @@ public class LocalFSPluginFinder implements PluginFinder { private static final String DEFAULT_PLUGIN_FINDER_NAME = "localFS"; private List pluginStores; private URLClassLoader pluginClassloader; - private Set foundedPlugins; @Override public void configure(BitSailConfiguration commonConfiguration) { - this.foundedPlugins = Sets.newHashSet(); String frameworkBaseDir = commonConfiguration .getUnNecessaryOption(CommonOptions.JOB_PLUGIN_ROOT_PATH, getFrameworkEntryDir().toString()); @@ -69,8 +67,8 @@ public void configure(BitSailConfiguration commonConfiguration) { .pluginMappingBaseDirPath(frameworkBaseDirPath.resolve(engineMappingDirName)) .build()); - this.pluginClassloader = (URLClassLoader) Thread.currentThread() - .getContextClassLoader(); + this.pluginClassloader = URLClassLoader.newInstance(new URL[] {}, Thread.currentThread() + .getContextClassLoader()); } @Override @@ -113,12 +111,11 @@ public void loadPlugin(String canonicalName) { } tryAddPluginToClassloader(pluginClassloader, pluginUrls); - foundedPlugins.addAll(pluginUrls); } @Override public Set getFoundedPlugins() { - return foundedPlugins; + return Sets.newHashSet(pluginClassloader.getURLs()); } @Override @@ -134,9 +131,9 @@ private static void tryAddPluginToClassloader(URLClassLoader classloader, for (URL pluginUrl : pluginUrls) { addUrlMethod.invoke(classloader, pluginUrl); + LOG.info("Plugin class loader add plugin url: {}.", pluginUrl); } - LOG.debug("Plugin class loader's url: {}.", classloader.getURLs()); } catch (Exception e) { //ignore } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/type/filemapping/FileMappingTypeInfoReader.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/type/filemapping/FileMappingTypeInfoReader.java index dccd5b1c3..39d17807b 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/type/filemapping/FileMappingTypeInfoReader.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/type/filemapping/FileMappingTypeInfoReader.java @@ -84,7 +84,7 @@ private void readerOption(Map converterConf, } private void read() throws IOException { - URL resource = FileMappingTypeInfoReader.class.getResource("/" + converterFileName); + URL resource = Thread.currentThread().getContextClassLoader().getResource(converterFileName); if (Objects.isNull(resource)) { throw new IllegalArgumentException(String.format("Resource for the column converter %s not found in classpath.", converterFileName)); } diff --git a/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/Engine.java b/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/Engine.java index 40aebe829..1be9fb7af 100644 --- a/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/Engine.java +++ b/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/Engine.java @@ -85,15 +85,21 @@ private void exitWhenException(Throwable e) throws Throwable { } private void run() throws Exception { + //plugin load from original class loader. PluginFinder pluginFinder = PluginFinderFactory .getPluginFinder(configuration.get(CommonOptions.PLUGIN_FINDER_NAME)); pluginFinder.configure(configuration); - Program entryProgram = ProgramFactory.createEntryProgram(pluginFinder, coreCommandArgs, configuration); - LOG.info("Final program: {}.", entryProgram.getComponentName()); - entryProgram.configure(pluginFinder, configuration, coreCommandArgs); + ClassLoader original = Thread.currentThread().getContextClassLoader(); try { + //set context class loader to plugin's class loader. + Thread.currentThread().setContextClassLoader(pluginFinder.getClassloader()); + + Program entryProgram = ProgramFactory.createEntryProgram(pluginFinder, coreCommandArgs, configuration); + LOG.info("Final program: {}.", entryProgram.getComponentName()); + entryProgram.configure(pluginFinder, configuration, coreCommandArgs); + if (entryProgram.validate()) { entryProgram.submit(); } @@ -101,6 +107,8 @@ private void run() throws Exception { if (configuration.fieldExists(CommonOptions.SLEEP_TIME)) { Thread.sleep(configuration.get(CommonOptions.SLEEP_TIME)); } + //reset context classloader to original. + Thread.currentThread().setContextClassLoader(original); } } } diff --git a/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/program/ProgramFactory.java b/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/program/ProgramFactory.java index 861497f90..f4990ac92 100644 --- a/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/program/ProgramFactory.java +++ b/bitsail-cores/bitsail-core-entry/src/main/java/com/bytedance/bitsail/core/program/ProgramFactory.java @@ -31,7 +31,7 @@ public static Program createEntryProgram(PluginFinder pluginFinder, pluginFinder.loadPlugin(engineName); DefaultComponentBuilderLoader loader = - new DefaultComponentBuilderLoader<>(Program.class, pluginFinder.getClassloader()); + new DefaultComponentBuilderLoader<>(Program.class); return loader.loadComponent(engineName, true); } diff --git a/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/FlinkExecutionEnviron.java b/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/FlinkExecutionEnviron.java index 8e9744a32..edc2762cb 100644 --- a/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/FlinkExecutionEnviron.java +++ b/bitsail-cores/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/FlinkExecutionEnviron.java @@ -41,6 +41,7 @@ import com.alibaba.fastjson.JSONObject; import lombok.Getter; import lombok.Setter; +import org.apache.commons.collections.CollectionUtils; import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.dag.Transformation; import org.apache.flink.configuration.ConfigUtils; @@ -91,6 +92,10 @@ public void configure(Mode mode, PluginFinder pluginFinder, BitSailConfiguration } public void addPluginToExecution(Set libraries) { + if (CollectionUtils.isEmpty(libraries)) { + LOG.info("No plugins will add to execution environ."); + return; + } Configuration configuration = getFlinkConfiguration(); List classpath = ConfigUtils .decodeListFromConfig(configuration, PipelineOptions.JARS, URI::create); From 4649659bf9e6e0c324f64c8d0d7b1a5ff2bad2c2 Mon Sep 17 00:00:00 2001 From: haoke Date: Mon, 27 Mar 2023 14:47:13 +0800 Subject: [PATCH 04/14] [BitSail][Improve]Support simple type converter. --- .../{Serializer.java => Converter.java} | 6 +- .../{RowSerializer.java => RowConverter.java} | 2 +- .../typeinfo/TypeInfoValueConverter.java | 446 ++++++++++++++++++ .../typeinfo/TypeInfoValueConverterTest.java | 21 + .../delegate/DelegateFlinkSourceReader.java | 6 +- .../delegate/DelegateSourcePipeline.java | 8 +- .../writer/delegate/DelegateFlinkWriter.java | 8 +- .../delegate/DelegateFlinkSourceReader.java | 6 +- .../delegate/DelegateSourcePipeline.java | 8 +- .../writer/delegate/DelegateFlinkWriter.java | 8 +- .../bitsail_connector_unified_conf.json | 32 +- .../converter/FlinkRowConvertSerializer.java | 247 ---------- .../delagate/converter/FlinkRowConverter.java | 95 ++++ .../FlinkRowConvertSerializerTest.java | 12 +- 14 files changed, 625 insertions(+), 280 deletions(-) rename bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/{Serializer.java => Converter.java} (78%) rename bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/{RowSerializer.java => RowConverter.java} (91%) create mode 100644 bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java create mode 100644 bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java delete mode 100644 bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializer.java create mode 100644 bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConverter.java diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/Serializer.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/Converter.java similarity index 78% rename from bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/Serializer.java rename to bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/Converter.java index 73882651e..1b901716f 100644 --- a/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/Serializer.java +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/Converter.java @@ -22,9 +22,9 @@ /** * Created 2022/6/14 */ -public interface Serializer extends Serializable { +public interface Converter extends Serializable { - DeserializeT serialize(SerializeT obj) throws IOException; + DeserializeT from(SerializeT obj) throws IOException; - SerializeT deserialize(DeserializeT serialized) throws IOException; + SerializeT to(DeserializeT serialized) throws IOException; } diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/RowSerializer.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/RowConverter.java similarity index 91% rename from bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/RowSerializer.java rename to bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/RowConverter.java index 9025db23c..85fab245e 100644 --- a/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/RowSerializer.java +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/serializer/RowConverter.java @@ -21,6 +21,6 @@ /** * Created 2022/6/21 */ -public interface RowSerializer extends Serializer { +public interface RowConverter extends Converter { } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java new file mode 100644 index 000000000..3fbdbe322 --- /dev/null +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java @@ -0,0 +1,446 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.typeinfo; + +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.column.Column; +import com.bytedance.bitsail.common.column.ListColumn; +import com.bytedance.bitsail.common.column.MapColumn; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.exception.CommonErrorCode; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.MapUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.Charset; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static com.bytedance.bitsail.common.typeinfo.TypeInfos.STRING_TYPE_INFO; + +public class TypeInfoValueConverter implements Serializable { + private static final Logger LOG = LoggerFactory.getLogger(TypeInfoValueConverter.class); + + private BitSailConfiguration commonConfiguration; + private DateTimeFormatter dateFormatter; + private DateTimeFormatter timeFormatter; + private DateTimeFormatter dateTimeFormatter; + private ZoneId timezone; + + public TypeInfoValueConverter(BitSailConfiguration commonConfiguration) { + this.commonConfiguration = commonConfiguration; + } + + /** + * Try to convert value to type info's definition. + */ + public Object convertObject(Object value, + TypeInfo typeInfo) { + //Return null directly if input is null. + if (Objects.isNull(value)) { + return null; + } + if (value instanceof Column) { + return convertColumnObject((Column) value, typeInfo); + } + if (compareValueTypeInfo(value, typeInfo)) { + return value; + } + return convertNormalObject(value, typeInfo); + } + + private Object convertColumnObject(Column value, + TypeInfo typeInfo) { + if (Objects.isNull(value)) { + return null; + } + + Class typeInfoTypeClass = typeInfo.getTypeClass(); + if (List.class.isAssignableFrom(typeInfoTypeClass)) { + if (!(value instanceof ListColumn)) { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Column is not list column type, value: %s", value)); + } + return convertListColumnObject((ListColumn) value, (ListTypeInfo) typeInfo); + } + + if (Map.class.isAssignableFrom(typeInfoTypeClass)) { + if (!(value instanceof MapColumn)) { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Column is not map column type, value: %s", value)); + } + return convertMapColumnObject((MapColumn) value, (MapTypeInfo) typeInfo); + } + + return convertPrimitiveColumnObject(value, typeInfo); + } + + private List convertListColumnObject(ListColumn columns, ListTypeInfo listTypeInfo) { + TypeInfo elementTypeInfo = listTypeInfo.getElementTypeInfo(); + List objects = new ArrayList<>(); + if (Objects.nonNull(columns)) { + for (Column column : columns) { + objects.add(convertColumnObject(column, elementTypeInfo)); + } + } + return objects; + } + + private Map convertMapColumnObject(Map columnMap, MapTypeInfo mapTypeInfo) { + TypeInfo keyTypeInfo = mapTypeInfo.getKeyTypeInfo(); + TypeInfo valueTypeInfo = mapTypeInfo.getValueTypeInfo(); + + Map maps = new HashMap<>(); + if (Objects.nonNull(columnMap)) { + columnMap.forEach((key, value) -> { + Object keyValue = convertColumnObject((Column) key, keyTypeInfo); + if (Objects.isNull(keyValue)) { + throw new BitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, "Map's key can't be null."); + } + Object mapValue = convertColumnObject((Column) value, valueTypeInfo); + maps.put(keyValue, mapValue); + }); + } + return maps; + } + + private Object convertPrimitiveColumnObject(Column column, TypeInfo typeInfo) { + Class typeInfoTypeClass = typeInfo.getTypeClass(); + if (null == column.getRawData()) { + return null; + } + + if (STRING_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asString(); + } else if (TypeInfos.BOOLEAN_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asBoolean(); + } else if (TypeInfos.BYTE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asLong().byteValue(); + } else if (TypeInfos.INT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asLong().intValue(); + } else if (TypeInfos.SHORT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asLong().shortValue(); + } else if (TypeInfos.LONG_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asLong(); + } else if (TypeInfos.BIG_INTEGER_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asBigInteger(); + } else if (TypeInfos.FLOAT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asDouble().floatValue(); + } else if (TypeInfos.DOUBLE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asDouble(); + } else if (TypeInfos.BIG_DECIMAL_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asBigDecimal(); + } else if (TypeInfos.SQL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return new java.sql.Date(column.asDate().getTime()); + } else if (TypeInfos.SQL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return new java.sql.Time(column.asDate().getTime()); + } else if (TypeInfos.SQL_TIMESTAMP_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return new java.sql.Timestamp(column.asDate().getTime()); + } else if (TypeInfos.LOCAL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asDate(); + } else if (TypeInfos.LOCAL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asDate(); + } else if (TypeInfos.LOCAL_DATE_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asDate(); + } else if (BasicArrayTypeInfo.BINARY_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return column.asBytes(); + } else { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + "Flink basic data type " + typeInfoTypeClass + " is not supported!"); + } + } + + /** + * Compare object's value type match with type info's definition or not. + */ + private boolean compareValueTypeInfo(Object value, + TypeInfo typeInfo) { + if (Objects.isNull(value)) { + return true; + } + + if (typeInfo instanceof MapTypeInfo) { + if (!(value instanceof Map)) { + return false; + } + Map map = (Map) value; + if (MapUtils.isEmpty(map)) { + return true; + } + + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + TypeInfo keyTypeInfo = mapTypeInfo.getKeyTypeInfo(); + TypeInfo valueTypeInfo = mapTypeInfo.getValueTypeInfo(); + + Iterator keyIterator = map.keySet().iterator(); + Object next = keyIterator.next(); + + //TODO find first not null key and first not null value, maybe it will consumer more resources. + return compareValueTypeInfo(next, keyTypeInfo) + && compareValueTypeInfo(map.get(next), valueTypeInfo); + } + + if (typeInfo instanceof ListTypeInfo) { + if ((!(value instanceof List))) { + return false; + } + List list = (List) value; + if (CollectionUtils.isEmpty(list)) { + return true; + } + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + TypeInfo elementTypeInfo = listTypeInfo.getElementTypeInfo(); + + //TODO find first not null element, maybe it will consumer more resources. + return compareValueTypeInfo(list.get(0), elementTypeInfo); + + } + return value.getClass().isAssignableFrom(typeInfo.getTypeClass()); + } + + private Object convertNormalObject(Object value, TypeInfo typeInfo) { + if (Objects.isNull(value)) { + return null; + } + + if (typeInfo instanceof MapTypeInfo) { + if (!(value instanceof Map)) { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + "Object can't convert to map type."); + } + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + Map raw = (Map) value; + Map map = Maps.newHashMap(); + for (Object key : raw.keySet()) { + map.put(convertNormalObject(key, mapTypeInfo.getKeyTypeInfo()), + convertNormalObject(raw.get(key), mapTypeInfo.getValueTypeInfo())); + } + return map; + } else if (typeInfo instanceof ListTypeInfo) { + if (!(value instanceof List)) { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + "Object can't convert to list type."); + } + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + List raw = (List) value; + List list = Lists.newArrayList(); + for (Object key : raw) { + list.add(convertNormalObject(key, listTypeInfo.getElementTypeInfo())); + } + return list; + + } else { + return convertPrimitiveObject(value, typeInfo); + } + } + + /** + * TODO add chart to show the relation of the type conversion. + * TODO check number type overflow when do the convert. + */ + private Object convertPrimitiveObject(Object value, TypeInfo typeInfo) { + if (Objects.isNull(value)) { + return null; + } + + Class typeInfoTypeClass = typeInfo.getTypeClass(); + + if (STRING_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof byte[]) { + return new String((byte[]) value, Charset.defaultCharset()); + } + return String.valueOf(value); + } + + if (TypeInfos.SHORT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Number) { + return ((Number) value).shortValue(); + } + return NumberUtils.createNumber(value.toString()).shortValue(); + } + + if (TypeInfos.INT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Number) { + return ((Number) value).intValue(); + } + return NumberUtils.createNumber(value.toString()).intValue(); + } + + if (TypeInfos.LONG_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Number) { + return ((Number) value).longValue(); + } + return NumberUtils.createNumber(value.toString()).longValue(); + } + + if (TypeInfos.FLOAT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Number) { + return ((Number) value).floatValue(); + } + return NumberUtils.createNumber(value.toString()).floatValue(); + } + + if (TypeInfos.DOUBLE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + return NumberUtils.createNumber(value.toString()).doubleValue(); + } + + if (TypeInfos.BIG_INTEGER_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Number) { + return ((Number) value).intValue(); + } + return new BigInteger(value.toString()); + } + + if (TypeInfos.BIG_DECIMAL_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return new BigDecimal(value.toString()); + } + + if (TypeInfos.BOOLEAN_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + String str = value.toString(); + return Boolean.parseBoolean(str); + } + + if (TypeInfos.LOCAL_DATE_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return convertLocalDateTime(value, typeInfo); + } + + if (TypeInfos.LOCAL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return convertLocalDate(value, typeInfo); + } + + if (TypeInfos.LOCAL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return convertLocalTime(value, typeInfo); + } + + if (TypeInfos.SQL_TIMESTAMP_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return convertSqlTimestamp(value, typeInfo); + } + + if (TypeInfos.SQL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return convertSqlDate(value, typeInfo); + } + + if (TypeInfos.SQL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + return convertSqlTime(value, typeInfo); + } + + if (BasicArrayTypeInfo.BINARY_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof byte[]) { + return (byte[]) value; + } + } + + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + + private Object convertSqlTime(Object value, TypeInfo typeInfo) { + return null; + } + + private Object convertSqlDate(Object value, TypeInfo typeInfo) { + return null; + } + + private Object convertSqlTimestamp(Object value, TypeInfo typeInfo) { + return null; + } + + private Object convertLocalTime(Object value, TypeInfo typeInfo) { + if (value instanceof LocalTime) { + return (LocalTime) value; + } + if (value instanceof LocalDateTime) { + return ((LocalDateTime) value).toLocalTime(); + } + if (value instanceof String) { + //convert string to local date time. + try { + return LocalTime.parse(value.toString(), timeFormatter); + } catch (Exception e) { + LOG.debug("Value {} can't convert to local time.", value); + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + } + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + + private Object convertLocalDate(Object value, TypeInfo typeInfo) { + if (value instanceof LocalDateTime) { + return ((LocalDateTime) value).toLocalDate(); + } + if (value instanceof LocalDate) { + return (LocalDate) value; + } + if (value instanceof String) { + //convert string to local date time. + try { + return LocalDate.parse(value.toString(), dateFormatter); + } catch (Exception e) { + LOG.debug("Value {} can't convert to local date time.", value); + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + } + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + + private Object convertLocalDateTime(Object value, TypeInfo typeInfo) { + if (value instanceof LocalDateTime) { + return (LocalDateTime) value; + } + if (value instanceof LocalDate) { + return ((LocalDate) value).atStartOfDay(); + } + if (value instanceof String) { + //convert string to local date time. + try { + return LocalDateTime.parse(value.toString(), dateTimeFormatter); + } catch (Exception e) { + LOG.debug("Value {} can't convert to local date time.", value); + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + } + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Value %s can't convert into type info %s.", value, typeInfo)); + } + +} diff --git a/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java b/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java new file mode 100644 index 000000000..f5d027d01 --- /dev/null +++ b/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java @@ -0,0 +1,21 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.typeinfo; + +public class TypeInfoValueConverterTest { + +} \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSourceReader.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSourceReader.java index 4b48c286e..68abc9947 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSourceReader.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSourceReader.java @@ -26,7 +26,7 @@ import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; import com.bytedance.bitsail.common.util.Pair; import com.bytedance.bitsail.core.flink.bridge.reader.delegate.operator.DelegateSourceReaderContext; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.bytedance.bitsail.flink.core.runtime.RuntimeContextInjectable; import com.google.common.collect.ImmutableList; @@ -55,7 +55,7 @@ public class DelegateFlinkSourceReader sourceReader; private transient DelegateSourcePipeline pipeline; - private transient FlinkRowConvertSerializer flinkRowConvertSerializer; + private transient FlinkRowConverter flinkRowConvertSerializer; private transient CompletableFuture available; private transient Messenger messenger; @@ -113,7 +113,7 @@ public void sendSplitRequest() { this.sourceReader = sourceReaderFunction .apply(context); this.available = new CompletableFuture<>(); - this.flinkRowConvertSerializer = new FlinkRowConvertSerializer( + this.flinkRowConvertSerializer = new FlinkRowConverter( rowTypeInfo, commonConfiguration); if (this.messenger instanceof RuntimeContextInjectable) { diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateSourcePipeline.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateSourcePipeline.java index 3d4d4a722..09853c0ce 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateSourcePipeline.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateSourcePipeline.java @@ -27,7 +27,7 @@ import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.option.CommonOptions; import com.bytedance.bitsail.common.row.Row; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.bytedance.bitsail.flink.core.util.RowUtil; import org.apache.flink.api.connector.source.ReaderOutput; @@ -44,7 +44,7 @@ public class DelegateSourcePipeline implements SourcePipeline { private final ReaderOutput readerOutput; //todo flink row converter and watermark. - private final FlinkRowConvertSerializer flinkRowConvertSerializer; + private final FlinkRowConverter flinkRowConvertSerializer; private final AbstractDirtyCollector dirtyCollector; @@ -57,7 +57,7 @@ public class DelegateSourcePipeline implements SourcePipeline { private Channel trafficLimiter; public DelegateSourcePipeline(ReaderOutput readerOutput, - FlinkRowConvertSerializer flinkRowConvertSerializer, + FlinkRowConverter flinkRowConvertSerializer, MetricManager metricManager, Messenger messenger, AbstractDirtyCollector dirtyCollectorFactory, @@ -81,7 +81,7 @@ private void preparePipeline() { public void output(T record) throws IOException { org.apache.flink.types.Row serialize; try { - serialize = flinkRowConvertSerializer.serialize((Row) record); + serialize = flinkRowConvertSerializer.to((Row) record); long rowBytesSize = RowUtil.getRowBytesSize(serialize); messenger.addSuccessRecord(rowBytesSize); metricManager.reportRecord(rowBytesSize, MessageType.SUCCESS); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java index 587aa194e..9682c5b65 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java @@ -36,7 +36,7 @@ import com.bytedance.bitsail.common.typeinfo.TypeInfoUtils; import com.bytedance.bitsail.common.util.Pair; import com.bytedance.bitsail.core.flink.bridge.serializer.DelegateSimpleVersionedSerializer; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.bytedance.bitsail.flink.core.runtime.RuntimeContextInjectable; import com.google.common.collect.ImmutableList; @@ -81,7 +81,7 @@ public class DelegateFlinkWriter sink; private final BitSailConfiguration writerConfiguration; private final BitSailConfiguration commonConfiguration; - private final FlinkRowConvertSerializer flinkRowConvertSerializer; + private final FlinkRowConverter flinkRowConvertSerializer; private final RowTypeInfo rowTypeInfo; private transient Writer writer; private transient ListState writeState; @@ -117,7 +117,7 @@ public DelegateFlinkWriter(BitSailConfiguration commonConfiguration, .getRowTypeInfo(sink.createTypeInfoConverter(), columnInfos); } - this.flinkRowConvertSerializer = new FlinkRowConvertSerializer( + this.flinkRowConvertSerializer = new FlinkRowConverter( this.rowTypeInfo, this.commonConfiguration); } @@ -191,7 +191,7 @@ public void processElement(StreamRecord element) throws Exception { try { if (value instanceof Row) { // convert flink row to BitSail row. - com.bytedance.bitsail.common.row.Row deserializer = flinkRowConvertSerializer.deserialize((Row) value); + com.bytedance.bitsail.common.row.Row deserializer = flinkRowConvertSerializer.to((Row) value); writer.write((InputT) deserializer); } else { writer.write(element.getValue()); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateFlinkSourceReader.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateFlinkSourceReader.java index c1944d0d4..36c7719c8 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateFlinkSourceReader.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateFlinkSourceReader.java @@ -25,7 +25,7 @@ import com.bytedance.bitsail.common.option.CommonOptions; import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; import com.bytedance.bitsail.common.util.Pair; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -53,7 +53,7 @@ public class DelegateFlinkSourceReader sourceReader; private transient DelegateSourcePipeline pipeline; - private transient FlinkRowConvertSerializer flinkRowConvertSerializer; + private transient FlinkRowConverter flinkRowConvertSerializer; private transient CompletableFuture available; private transient Messenger messenger; @@ -111,7 +111,7 @@ public void sendSplitRequest() { this.sourceReader = sourceReaderFunction .apply(context); this.available = new CompletableFuture<>(); - this.flinkRowConvertSerializer = new FlinkRowConvertSerializer( + this.flinkRowConvertSerializer = new FlinkRowConverter( rowTypeInfo, commonConfiguration); } diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateSourcePipeline.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateSourcePipeline.java index ae808865e..188cfde2e 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateSourcePipeline.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/reader/delegate/DelegateSourcePipeline.java @@ -27,7 +27,7 @@ import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.option.CommonOptions; import com.bytedance.bitsail.common.row.Row; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.bytedance.bitsail.flink.core.util.RowUtil; import org.apache.flink.api.connector.source.ReaderOutput; @@ -44,7 +44,7 @@ public class DelegateSourcePipeline implements SourcePipeline { private final ReaderOutput readerOutput; //todo flink row converter and watermark. - private final FlinkRowConvertSerializer flinkRowConvertSerializer; + private final FlinkRowConverter flinkRowConvertSerializer; private final AbstractDirtyCollector dirtyCollector; @@ -57,7 +57,7 @@ public class DelegateSourcePipeline implements SourcePipeline { private Channel trafficLimiter; public DelegateSourcePipeline(ReaderOutput readerOutput, - FlinkRowConvertSerializer flinkRowConvertSerializer, + FlinkRowConverter flinkRowConvertSerializer, MetricManager metricManager, Messenger messenger, AbstractDirtyCollector dirtyCollectorFactory, @@ -81,7 +81,7 @@ private void preparePipeline() { public void output(T record) throws IOException { org.apache.flink.types.Row serialize; try { - serialize = flinkRowConvertSerializer.serialize((Row) record); + serialize = flinkRowConvertSerializer.to((Row) record); long rowBytesSize = RowUtil.getRowBytesSize(serialize); messenger.addSuccessRecord(rowBytesSize); metricManager.reportRecord(rowBytesSize, MessageType.SUCCESS); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java index 49b6e58d5..2792a577d 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java @@ -36,7 +36,7 @@ import com.bytedance.bitsail.common.typeinfo.TypeInfoUtils; import com.bytedance.bitsail.common.util.Pair; import com.bytedance.bitsail.core.flink116.bridge.serializer.DelegateSimpleVersionedSerializer; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.bytedance.bitsail.flink.core.runtime.RuntimeContextInjectable; import com.google.common.collect.ImmutableList; @@ -81,7 +81,7 @@ public class DelegateFlinkWriter sink; private final BitSailConfiguration writerConfiguration; private final BitSailConfiguration commonConfiguration; - private final FlinkRowConvertSerializer flinkRowConvertSerializer; + private final FlinkRowConverter flinkRowConvertSerializer; private final RowTypeInfo rowTypeInfo; private transient Writer writer; private transient ListState writeState; @@ -117,7 +117,7 @@ public DelegateFlinkWriter(BitSailConfiguration commonConfiguration, .getRowTypeInfo(sink.createTypeInfoConverter(), columnInfos); } - this.flinkRowConvertSerializer = new FlinkRowConvertSerializer( + this.flinkRowConvertSerializer = new FlinkRowConverter( this.rowTypeInfo, this.commonConfiguration); } @@ -191,7 +191,7 @@ public void processElement(StreamRecord element) throws Exception { try { if (value instanceof Row) { // convert flink row to BitSail row. - com.bytedance.bitsail.common.row.Row deserializer = flinkRowConvertSerializer.deserialize((Row) value); + com.bytedance.bitsail.common.row.Row deserializer = flinkRowConvertSerializer.from((Row) value); writer.write((InputT) deserializer); } else { writer.write(element.getValue()); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/test/resources/bitsail_connector_unified_conf.json b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/test/resources/bitsail_connector_unified_conf.json index 0f2931064..79811f4ad 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/test/resources/bitsail_connector_unified_conf.json +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/test/resources/bitsail_connector_unified_conf.json @@ -47,7 +47,37 @@ }, "writer": { "class": "com.bytedance.bitsail.connector.print.sink.PrintSink", - "writer_parallelism_num": 2 + "writer_parallelism_num": 2, + "columns": [ + { + "name": "id", + "type": "string" + }, + { + "name": "local_date_value", + "type": "date" + }, + { + "name": "list_value", + "type": "list" + }, + { + "name": "map_value", + "type": "map" + }, + { + "name": "localdatetime_value", + "type": "timestamp" + }, + { + "name": "date_value", + "type": "date.date" + }, + { + "name": "datetime_value", + "type": "date.datetime" + } + ] } } } diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializer.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializer.java deleted file mode 100644 index 7967f06ed..000000000 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializer.java +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.bytedance.bitsail.flink.core.delagate.converter; - -import com.bytedance.bitsail.base.serializer.RowSerializer; -import com.bytedance.bitsail.common.BitSailException; -import com.bytedance.bitsail.common.column.Column; -import com.bytedance.bitsail.common.column.ListColumn; -import com.bytedance.bitsail.common.column.MapColumn; -import com.bytedance.bitsail.common.configuration.BitSailConfiguration; -import com.bytedance.bitsail.common.exception.CommonErrorCode; -import com.bytedance.bitsail.common.typeinfo.BasicArrayTypeInfo; -import com.bytedance.bitsail.common.typeinfo.ListTypeInfo; -import com.bytedance.bitsail.common.typeinfo.MapTypeInfo; -import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; -import com.bytedance.bitsail.common.typeinfo.TypeInfo; -import com.bytedance.bitsail.common.typeinfo.TypeInfos; - -import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.collections.MapUtils; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.flink.types.Row; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -import static com.bytedance.bitsail.common.typeinfo.TypeInfos.STRING_TYPE_INFO; - -/** - * Created 2022/6/21 - */ -public class FlinkRowConvertSerializer implements RowSerializer { - private final RowTypeInfo rowTypeInfo; - - private final BitSailConfiguration commonConfiguration; - - public FlinkRowConvertSerializer(RowTypeInfo rowTypeInfo, - BitSailConfiguration commonConfiguration) { - this.rowTypeInfo = rowTypeInfo; - this.commonConfiguration = commonConfiguration; - } - - @Override - public Row serialize(com.bytedance.bitsail.common.row.Row row) throws IOException { - Object[] fields = row.getFields(); - int arity = ArrayUtils.getLength(fields); - Row flinkRow = new Row(org.apache.flink.types.RowKind.fromByteValue(row.getKind().toByteValue()), arity); - for (int index = 0; index < arity; index++) { - TypeInfo typeInfo = rowTypeInfo.getTypeInfos()[index]; - Object field = row.getField(index); - if (field instanceof Column) { - field = deserializeColumn((Column) field, typeInfo, rowTypeInfo.getFieldNames()[index]); - } else { - if (!compareValueTypeInfo(field, typeInfo)) { - //todo transform - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("column %s type info %s not match with value type %s.", - rowTypeInfo.getFieldNames()[index], typeInfo, field.getClass())); - } - } - flinkRow.setField(index, field); - } - return flinkRow; - } - - private static boolean compareValueTypeInfo(Object value, - TypeInfo typeInfo) { - if (Objects.isNull(value)) { - return true; - } - - if (typeInfo instanceof MapTypeInfo) { - if (!(value instanceof Map)) { - return false; - } - Map map = (Map) value; - if (MapUtils.isEmpty(map)) { - return true; - } - - MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - TypeInfo keyTypeInfo = mapTypeInfo.getKeyTypeInfo(); - TypeInfo valueTypeInfo = mapTypeInfo.getValueTypeInfo(); - - Iterator keyIterator = map.keySet().iterator(); - Object next = keyIterator.next(); - - return compareValueTypeInfo(next, keyTypeInfo) - && compareValueTypeInfo(map.get(next), valueTypeInfo); - } - - if (typeInfo instanceof ListTypeInfo) { - if ((!(value instanceof List))) { - return false; - } - List list = (List) value; - if (CollectionUtils.isEmpty(list)) { - return true; - } - ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - TypeInfo elementTypeInfo = listTypeInfo.getElementTypeInfo(); - - return compareValueTypeInfo(list.get(0), elementTypeInfo); - - } - return value.getClass().isAssignableFrom(typeInfo.getTypeClass()); - } - - @Override - public com.bytedance.bitsail.common.row.Row deserialize(Row serialized) throws IOException { - int arity = serialized.getArity(); - Object[] fields = new Object[arity]; - for (int index = 0; index < arity; index++) { - TypeInfo typeInfo = rowTypeInfo.getTypeInfos()[index]; - Object field = serialized.getField(index); - String name = rowTypeInfo.getFieldNames()[index]; - if (field instanceof Column) { - fields[index] = deserializeColumn((Column) field, typeInfo, name); - } else { - fields[index] = field; - } - } - return new com.bytedance.bitsail.common.row.Row( - serialized.getKind().toByteValue(), - fields); - } - - private Object deserializeColumn(Column object, TypeInfo typeInfo, String name) throws BitSailException { - if (Objects.isNull(object)) { - return null; - } - - Class typeInfoTypeClass = typeInfo.getTypeClass(); - if (List.class.isAssignableFrom(typeInfoTypeClass)) { - if (!(object instanceof ListColumn)) { - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Column %s is not list type, value: %s", name, object)); - } - return getListColumnValue((List) object, (ListTypeInfo) typeInfo, name); - } - - if (Map.class.isAssignableFrom(typeInfoTypeClass)) { - if (!(object instanceof MapColumn)) { - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Column %s is not map type, value: %s", name, object)); - } - return getMapColumnValue((MapColumn) object, (MapTypeInfo) typeInfo, name); - } - - return getBasicTypeColumnValue((Column) object, typeInfo); - } - - private List getListColumnValue(List columns, ListTypeInfo listTypeInfo, String name) { - TypeInfo elementTypeInfo = listTypeInfo.getElementTypeInfo(); - List objects = new ArrayList<>(); - if (Objects.nonNull(columns)) { - for (Column column : columns) { - objects.add(deserializeColumn(column, elementTypeInfo, name)); - } - } - return objects; - } - - private Map getMapColumnValue(Map columnMap, MapTypeInfo mapTypeInfo, String name) { - TypeInfo keyTypeInfo = mapTypeInfo.getKeyTypeInfo(); - TypeInfo valueTypeInfo = mapTypeInfo.getValueTypeInfo(); - - Map maps = new HashMap<>(); - if (Objects.nonNull(columnMap)) { - columnMap.forEach((key, value) -> { - Object keyValue = deserializeColumn(key, keyTypeInfo, name); - if (Objects.isNull(keyValue)) { - throw new BitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, ""); - } - Object mapValue = deserializeColumn(value, valueTypeInfo, name); - maps.put(keyValue, mapValue); - }); - } - return maps; - } - - private Object getBasicTypeColumnValue(Column column, TypeInfo typeInfo) { - Class typeInfoTypeClass = typeInfo.getTypeClass(); - if (null == column.getRawData()) { - return null; - } - - if (STRING_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asString(); - } else if (TypeInfos.BOOLEAN_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asBoolean(); - } else if (TypeInfos.BYTE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asLong().byteValue(); - } else if (TypeInfos.INT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asLong().intValue(); - } else if (TypeInfos.SHORT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asLong().shortValue(); - } else if (TypeInfos.LONG_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asLong(); - } else if (TypeInfos.BIG_INTEGER_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asBigInteger(); - } else if (TypeInfos.FLOAT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asDouble().floatValue(); - } else if (TypeInfos.DOUBLE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asDouble(); - } else if (TypeInfos.BIG_DECIMAL_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asBigDecimal(); - } else if (TypeInfos.SQL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return new java.sql.Date(column.asDate().getTime()); - } else if (TypeInfos.SQL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return new java.sql.Time(column.asDate().getTime()); - } else if (TypeInfos.SQL_TIMESTAMP_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return new java.sql.Timestamp(column.asDate().getTime()); - } else if (TypeInfos.LOCAL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asDate(); - } else if (TypeInfos.LOCAL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asDate(); - } else if (TypeInfos.LOCAL_DATE_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asDate(); - } else if (BasicArrayTypeInfo.BINARY_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return column.asBytes(); - } else { - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - "Flink basic data type " + typeInfoTypeClass + " is not supported!"); - } - } - -} diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConverter.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConverter.java new file mode 100644 index 000000000..92c113f5b --- /dev/null +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConverter.java @@ -0,0 +1,95 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.flink.core.delagate.converter; + +import com.bytedance.bitsail.base.serializer.RowConverter; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * Created 2022/6/21 + */ +public class FlinkRowConverter implements RowConverter { + + private static final Logger LOG = LoggerFactory.getLogger(FlinkRowConverter.class); + + private final RowTypeInfo rowTypeInfo; + + private final TypeInfoValueConverter typeInfoValueConverter; + + public FlinkRowConverter(RowTypeInfo rowTypeInfo, + BitSailConfiguration commonConfiguration) { + this.rowTypeInfo = rowTypeInfo; + this.typeInfoValueConverter = new TypeInfoValueConverter(commonConfiguration); + } + + /** + * Commonly run in source side, will transform bitsail row to flink row. + */ + @Override + public Row to(com.bytedance.bitsail.common.row.Row row) throws IOException { + Object[] fields = row.getFields(); + int arity = ArrayUtils.getLength(fields); + Row flinkRow = new Row(org.apache.flink.types.RowKind.fromByteValue(row.getKind().toByteValue()), arity); + for (int index = 0; index < arity; index++) { + TypeInfo typeInfo = rowTypeInfo.getTypeInfos()[index]; + Object value = row.getField(index); + String name = rowTypeInfo.getFieldNames()[index]; + value = wrapperValueConverter(name, value, typeInfo); + flinkRow.setField(index, value); + } + return flinkRow; + } + + private Object wrapperValueConverter(String name, + Object value, + TypeInfo typeInfo) { + try { + return typeInfoValueConverter.convertObject(value, typeInfo); + } catch (Exception e) { + LOG.error("Convert column name: {}'s value: {} to type info's definition {} failed.", name, value, typeInfo.getTypeClass()); + throw e; + } + } + + /** + * Commonly run in sink side, we try to transform flink row to bitsail row. + */ + @Override + public com.bytedance.bitsail.common.row.Row from(Row serialized) throws IOException { + int arity = serialized.getArity(); + Object[] fields = new Object[arity]; + for (int index = 0; index < arity; index++) { + TypeInfo typeInfo = rowTypeInfo.getTypeInfos()[index]; + Object value = serialized.getField(index); + String name = rowTypeInfo.getFieldNames()[index]; + fields[index] = wrapperValueConverter(name, value, typeInfo); + } + return new com.bytedance.bitsail.common.row.Row( + serialized.getKind().toByteValue(), + fields); + } +} diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/test/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializerTest.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/test/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializerTest.java index 548d92165..4cd6920e8 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/test/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializerTest.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/test/java/com/bytedance/bitsail/flink/core/delagate/converter/FlinkRowConvertSerializerTest.java @@ -44,7 +44,7 @@ import static org.junit.Assert.assertEquals; public class FlinkRowConvertSerializerTest { - private FlinkRowConvertSerializer flinkRowConvertSerializer; + private FlinkRowConverter flinkRowConvertSerializer; @Before public void init() { @@ -58,14 +58,14 @@ public void init() { TypeInfoConverter converter = new BitSailTypeInfoConverter(); BitSailConfiguration conf = BitSailConfiguration.newDefault(); RowTypeInfo rowTypeInfo = TypeInfoUtils.getRowTypeInfo(converter, columns); - flinkRowConvertSerializer = new FlinkRowConvertSerializer(rowTypeInfo, conf); + flinkRowConvertSerializer = new FlinkRowConverter(rowTypeInfo, conf); } @Test public void serializeTest() throws IOException { com.bytedance.bitsail.common.row.Row row = new com.bytedance.bitsail.common.row.Row(new Object[] {"test"}); - Row serialize = flinkRowConvertSerializer.serialize(row); - com.bytedance.bitsail.common.row.Row deserialize = flinkRowConvertSerializer.deserialize(serialize); + Row serialize = flinkRowConvertSerializer.to(row); + com.bytedance.bitsail.common.row.Row deserialize = flinkRowConvertSerializer.from(serialize); Assert.assertEquals(row.getField(0), deserialize.getField(0)); } @@ -83,7 +83,7 @@ public void deserializeTest() throws IOException { listValue, mapValue ); - com.bytedance.bitsail.common.row.Row bitSailRow = flinkRowConvertSerializer.deserialize(row); + com.bytedance.bitsail.common.row.Row bitSailRow = flinkRowConvertSerializer.from(row); assertEquals(bitSailRow.getFields().length, 5); assertEquals(bitSailRow.getField(0), stringValue); assertEquals(bitSailRow.getField(1), intValue); @@ -109,7 +109,7 @@ public void bitSailColumnDeserializeTest() throws IOException { new ListColumn<>(listValue, StringColumn.class), new MapColumn<>(mapValue, StringColumn.class, LongColumn.class) ); - com.bytedance.bitsail.common.row.Row bitSailRow = flinkRowConvertSerializer.deserialize(row); + com.bytedance.bitsail.common.row.Row bitSailRow = flinkRowConvertSerializer.from(row); assertEquals(bitSailRow.getFields().length, 5); assertEquals(bitSailRow.getField(0), stringValue); assertEquals(bitSailRow.getField(1), intValue); From fb13afabd10e4f394f4a62169b28ae2d827b11f2 Mon Sep 17 00:00:00 2001 From: haoke Date: Mon, 24 Apr 2023 16:09:46 +0800 Subject: [PATCH 05/14] [BitSail][Multi-Sink]Support MultipleTableSink. --- ...atalogFactory.java => CatalogFactory.java} | 11 +- ...yHelper.java => CatalogFactoryHelper.java} | 8 +- .../extension/SupportMultipleSinkTable.java | 37 ++ .../common/catalog/TableCatalogManager.java | 48 +-- .../table/{TableCatalog.java => Catalog.java} | 15 +- .../common/catalog/table/CatalogTable.java | 12 +- .../catalog/table/CatalogTableDefinition.java | 46 --- .../bitsail/common/catalog/table/TableId.java | 94 +++++ .../bitsail/common/option/WriterOptions.java | 8 + .../bitsail/common/row/MultipleTableRow.java | 119 +++++++ .../common/typeinfo/GenericTypeInfo.java | 55 +++ .../typeinfo/TypeInfoValueConverter.java | 63 ++-- ...nagerTest.java => CatalogManagerTest.java} | 30 +- ...FakeTableCatalog.java => FakeCatalog.java} | 29 +- .../DebeziumDeserializationSchema.java | 4 +- .../DebeziumRowDeserializationSchema.java | 325 ++++++++++++++++++ .../option/DebeziumReaderOptions.java | 8 + .../option/DebeziumWriterOptions.java | 4 +- .../DebeziumRowDeserializationSchemaTest.java | 48 +++ .../src/test/resources/file/debezium.json | 257 ++++++++++++++ ...HiveTableCatalog.java => HiveCatalog.java} | 25 +- .../hive/common/HiveTableCatalogFactory.java | 16 +- ...dance.bitsail.base.catalog.CatalogFactory} | 0 ...SQLTableCatalog.java => MySQLCatalog.java} | 39 ++- .../catalog/MySQLTableCatalogFactory.java | 16 +- ...dance.bitsail.base.catalog.CatalogFactory} | 0 bitsail-cores/bitsail-core-common/pom.xml | 56 +++ .../MultipleTableCommitSerializer.java | 96 ++++++ .../MultipleTableStateSerializer.java | 96 ++++++ .../sink/multiple/MultipleTableCommitter.java | 62 ++++ .../sink/multiple/MultipleTableSink.java | 136 ++++++++ .../sink/multiple/MultipleTableWriter.java | 303 ++++++++++++++++ .../comittable/MultipleTableCommit.java | 38 ++ .../multiple/state/MultipleTableState.java | 38 ++ .../core/common/catalog/FakeCatalog.java | 89 +++++ .../MultipleTableCommitSerializerTest.java | 55 +++ .../MultipleTableStateSerializerTest.java | 55 +++ .../core/common/sink/MultiTablePrintSink.java | 47 +++ .../multiple/MultipleTableWriterTest.java | 182 ++++++++++ .../test/resources/file/debezium_table1.json | 257 ++++++++++++++ .../writer/builder/FlinkWriterBuilder.java | 34 +- .../writer/delegate/DelegateFlinkWriter.java | 6 +- .../bitsail-core-flink-1.16-bridge/pom.xml | 21 ++ .../FlinkDAGBuilderInterceptor.java | 12 +- bitsail-cores/bitsail-core-flink/pom.xml | 8 + bitsail-cores/pom.xml | 1 + .../legacy/mysql/MysqlConnectorITCase.java | 8 +- 47 files changed, 2704 insertions(+), 213 deletions(-) rename bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/{TableCatalogFactory.java => CatalogFactory.java} (68%) rename bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/{TableCatalogFactoryHelper.java => CatalogFactoryHelper.java} (77%) create mode 100644 bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportMultipleSinkTable.java rename bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/{TableCatalog.java => Catalog.java} (87%) delete mode 100644 bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableDefinition.java create mode 100644 bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java create mode 100644 bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java create mode 100644 bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/GenericTypeInfo.java rename bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/{TableCatalogManagerTest.java => CatalogManagerTest.java} (84%) rename bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/{FakeTableCatalog.java => FakeCatalog.java} (81%) create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium.json rename bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/{HiveTableCatalog.java => HiveCatalog.java} (83%) rename bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/resources/META-INF/services/{com.bytedance.bitsail.base.catalog.TableCatalogFactory => com.bytedance.bitsail.base.catalog.CatalogFactory} (100%) rename bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/{MySQLTableCatalog.java => MySQLCatalog.java} (80%) rename bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/resources/META-INF/services/{com.bytedance.bitsail.base.catalog.TableCatalogFactory => com.bytedance.bitsail.base.catalog.CatalogFactory} (100%) create mode 100644 bitsail-cores/bitsail-core-common/pom.xml create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializer.java create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializer.java create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/comittable/MultipleTableCommit.java create mode 100644 bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/state/MultipleTableState.java create mode 100644 bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/catalog/FakeCatalog.java create mode 100644 bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializerTest.java create mode 100644 bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializerTest.java create mode 100644 bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/MultiTablePrintSink.java create mode 100644 bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java create mode 100644 bitsail-cores/bitsail-core-common/src/test/resources/file/debezium_table1.json diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/TableCatalogFactory.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/CatalogFactory.java similarity index 68% rename from bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/TableCatalogFactory.java rename to bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/CatalogFactory.java index d91229a51..a24b02952 100644 --- a/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/TableCatalogFactory.java +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/CatalogFactory.java @@ -18,8 +18,7 @@ import com.bytedance.bitsail.base.component.ComponentBuilder; import com.bytedance.bitsail.base.connector.BuilderGroup; -import com.bytedance.bitsail.base.execution.ExecutionEnviron; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import java.io.Serializable; @@ -27,16 +26,14 @@ /** * Created 2022/5/23 */ -public interface TableCatalogFactory extends Serializable, ComponentBuilder { +public interface CatalogFactory extends Serializable, ComponentBuilder { /** * Create a table catalog. * - * @param executionEnviron execution environment * @param connectorConfiguration configuration for the reader/writer */ - TableCatalog createTableCatalog(BuilderGroup builderGroup, - ExecutionEnviron executionEnviron, - BitSailConfiguration connectorConfiguration); + Catalog createTableCatalog(BuilderGroup builderGroup, + BitSailConfiguration connectorConfiguration); } diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/TableCatalogFactoryHelper.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/CatalogFactoryHelper.java similarity index 77% rename from bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/TableCatalogFactoryHelper.java rename to bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/CatalogFactoryHelper.java index 6e9695ab9..ccb5aaf3c 100644 --- a/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/TableCatalogFactoryHelper.java +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/catalog/CatalogFactoryHelper.java @@ -20,11 +20,11 @@ import org.apache.commons.lang3.StringUtils; -public class TableCatalogFactoryHelper { +public class CatalogFactoryHelper { - public static TableCatalogFactory getTableCatalogFactory(String connectorName) { - DefaultComponentBuilderLoader loader = - new DefaultComponentBuilderLoader<>(TableCatalogFactory.class); + public static CatalogFactory getTableCatalogFactory(String connectorName) { + DefaultComponentBuilderLoader loader = + new DefaultComponentBuilderLoader<>(CatalogFactory.class); return loader.loadComponent(StringUtils.lowerCase(connectorName), false); } diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportMultipleSinkTable.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportMultipleSinkTable.java new file mode 100644 index 000000000..77e589942 --- /dev/null +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportMultipleSinkTable.java @@ -0,0 +1,37 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.base.extension; + +import com.bytedance.bitsail.base.connector.writer.v1.Writer; +import com.bytedance.bitsail.base.connector.writer.v1.WriterCommitter; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; + +import java.io.Serializable; +import java.util.Optional; + +public interface SupportMultipleSinkTable { + + Writer createWriter(Writer.Context context, + BitSailConfiguration templateConfiguration); + + Optional> createCommitter(BitSailConfiguration templateConfiguration); + + BitSailConfiguration applyTableId(BitSailConfiguration template, + TableId tableId); + +} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/TableCatalogManager.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/TableCatalogManager.java index 59cc885b8..a797c1c03 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/TableCatalogManager.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/TableCatalogManager.java @@ -17,12 +17,12 @@ package com.bytedance.bitsail.common.catalog; import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.catalog.table.CatalogTable; import com.bytedance.bitsail.common.catalog.table.CatalogTableAlterDefinition; import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; -import com.bytedance.bitsail.common.catalog.table.CatalogTableDefinition; import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.catalog.table.TableOperation; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.model.ColumnInfo; @@ -50,8 +50,8 @@ public class TableCatalogManager { private final TypeInfoConverter readerTypeInfoConverter; private final TypeInfoConverter writerTypeInfoConverter; - private final TableCatalog readerTableCatalog; - private final TableCatalog writerTableCatalog; + private final Catalog readerCatalog; + private final Catalog writerCatalog; private final BitSailConfiguration commonConfiguration; private final BitSailConfiguration readerConfiguration; @@ -73,15 +73,15 @@ public class TableCatalogManager { @Builder public TableCatalogManager(TypeInfoConverter readerTypeInfoConverter, TypeInfoConverter writerTypeInfoConverter, - TableCatalog readerTableCatalog, - TableCatalog writerTableCatalog, + Catalog readerCatalog, + Catalog writerCatalog, BitSailConfiguration commonConfiguration, BitSailConfiguration readerConfiguration, BitSailConfiguration writerConfiguration) { this.readerTypeInfoConverter = readerTypeInfoConverter; this.writerTypeInfoConverter = writerTypeInfoConverter; - this.readerTableCatalog = readerTableCatalog; - this.writerTableCatalog = writerTableCatalog; + this.readerCatalog = readerCatalog; + this.writerCatalog = writerCatalog; this.commonConfiguration = commonConfiguration; this.readerConfiguration = readerConfiguration; this.writerConfiguration = writerConfiguration; @@ -102,7 +102,7 @@ private void prepareCatalogManager() { } public void alignmentCatalogTable() throws Exception { - if (Objects.isNull(readerTableCatalog) || Objects.isNull(writerTableCatalog)) { + if (Objects.isNull(readerCatalog) || Objects.isNull(writerCatalog)) { return; } @@ -114,28 +114,28 @@ public void alignmentCatalogTable() throws Exception { startTableCatalog(); try { - CatalogTableDefinition readerTableDefinition = readerTableCatalog.createCatalogTableDefinition(); - CatalogTableDefinition writerTableDefinition = readerTableCatalog.createCatalogTableDefinition(); - if (!readerTableCatalog.tableExists(readerTableDefinition)) { + TableId readerTableDefinition = readerCatalog.createCatalogTableDefinition(); + TableId writerTableDefinition = readerCatalog.createCatalogTableDefinition(); + if (!readerCatalog.tableExists(readerTableDefinition)) { throw BitSailException.asBitSailException(TableCatalogErrorCode.TABLE_CATALOG_TABLE_NOT_EXISTS, String.format("Reader table definition %s not exists.", readerTableDefinition)); } // get reader catalog table. - readerCatalogTable = readerTableCatalog.getCatalogTable(readerTableDefinition); + readerCatalogTable = readerCatalog.getCatalogTable(readerTableDefinition); - if (!writerTableCatalog.tableExists(writerTableDefinition)) { + if (!writerCatalog.tableExists(writerTableDefinition)) { if (!tableCatalogCreateTableNotExists) { throw BitSailException.asBitSailException(TableCatalogErrorCode.TABLE_CATALOG_TABLE_NOT_EXISTS, String.format("Writer table definition %s not exists.", writerTableDefinition)); } // try to create table when not exists. - writerTableCatalog.createTable(writerTableDefinition, readerCatalogTable); + writerCatalog.createTable(writerTableDefinition, readerCatalogTable); } // get writer catalog table. - writerCatalogTable = writerTableCatalog.getCatalogTable(writerTableDefinition); + writerCatalogTable = writerCatalog.getCatalogTable(writerTableDefinition); // get base table schema. CatalogTableSchema catalogTableSchema = tableCatalogStrategy @@ -190,7 +190,7 @@ private void alterCatalogSchema(CatalogTableAlterDefinition catalogTableAlterDef CollectionUtils.isNotEmpty(catalogTableAlterDefinition.getPendingAddColumns())) { LOG.info("Writer catalog table {} try to add column: {}.", writerCatalogTable, catalogTableAlterDefinition.getPendingAddColumns()); - writerTableCatalog.alterTableColumns( + writerCatalog.alterTableColumns( TableOperation.ALTER_COLUMNS_ADD, catalogTableAlterDefinition.getPendingAddColumns() ); @@ -200,7 +200,7 @@ private void alterCatalogSchema(CatalogTableAlterDefinition catalogTableAlterDef CollectionUtils.isNotEmpty(catalogTableAlterDefinition.getPendingUpdateColumns())) { LOG.info("Writer catalog table {} try to update column: {}.", writerCatalogTable, catalogTableAlterDefinition.getPendingUpdateColumns()); - writerTableCatalog.alterTableColumns( + writerCatalog.alterTableColumns( TableOperation.ALTER_COLUMNS_UPDATE, catalogTableAlterDefinition.getPendingUpdateColumns() ); @@ -210,7 +210,7 @@ private void alterCatalogSchema(CatalogTableAlterDefinition catalogTableAlterDef CollectionUtils.isNotEmpty(catalogTableAlterDefinition.getPendingDeleteColumns())) { LOG.info("Writer catalog table {} try to delete column: {}.", writerCatalogTable, catalogTableAlterDefinition.getPendingDeleteColumns()); - writerTableCatalog.alterTableColumns( + writerCatalog.alterTableColumns( TableOperation.ALTER_COLUMNS_DELETE, catalogTableAlterDefinition.getPendingDeleteColumns() ); @@ -262,7 +262,7 @@ private CatalogTableAlterDefinition calNecessaryCatalogSchema(CatalogTableSchema TypeInfo baseTypeInfo = catalogTableColumn.getType(); finalCatalogColumns.add(catalogTableColumn); - if (!writerTableCatalog.compareTypeCompatible(writerTypeInfo, baseTypeInfo)) { + if (!writerCatalog.compareTypeCompatible(writerTypeInfo, baseTypeInfo)) { pendingUpdateTableColumns.add(catalogTableColumn); } } else { @@ -279,13 +279,13 @@ private CatalogTableAlterDefinition calNecessaryCatalogSchema(CatalogTableSchema } private void startTableCatalog() { - readerTableCatalog.open(readerTypeInfoConverter); - writerTableCatalog.open(writerTypeInfoConverter); + readerCatalog.open(readerTypeInfoConverter); + writerCatalog.open(writerTypeInfoConverter); } private void closeTableCatalog() { - readerTableCatalog.close(); - writerTableCatalog.close(); + readerCatalog.close(); + writerCatalog.close(); } } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableCatalog.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/Catalog.java similarity index 87% rename from bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableCatalog.java rename to bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/Catalog.java index a8cfe1d23..8659b07d4 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableCatalog.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/Catalog.java @@ -28,7 +28,7 @@ /** * Table catalog only for the signal table for now. */ -public interface TableCatalog extends Serializable { +public interface Catalog extends Serializable { /** * Open Table catalog @@ -43,22 +43,27 @@ public interface TableCatalog extends Serializable { /** * Get the reference table for the table catalog. */ - CatalogTableDefinition createCatalogTableDefinition(); + TableId createCatalogTableDefinition(); + + /** + * List table under the database + */ + List listTables(); /** * Check the table exits or not. */ - boolean tableExists(CatalogTableDefinition catalogTableDefinition); + boolean tableExists(TableId catalogTableDefinition); /** * Acquire catalog table by the table definition. */ - CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinition); + CatalogTable getCatalogTable(TableId catalogTableDefinition); /** * Create table */ - void createTable(CatalogTableDefinition catalogTableDefinition, + void createTable(TableId catalogTableDefinition, CatalogTable catalogTable); /** diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTable.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTable.java index 85d7c2d4e..3deebf791 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTable.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTable.java @@ -25,21 +25,21 @@ @Builder public class CatalogTable implements Serializable { - private CatalogTableDefinition catalogTableDefinition; + private TableId tableId; private final CatalogTableSchema catalogTableSchema; private final String comment; - public CatalogTable(CatalogTableDefinition catalogTableDefinition, + public CatalogTable(TableId tableId, CatalogTableSchema catalogTableSchema) { - this(catalogTableDefinition, catalogTableSchema, null); + this(tableId, catalogTableSchema, null); } - public CatalogTable(CatalogTableDefinition catalogTableDefinition, + public CatalogTable(TableId tableId, CatalogTableSchema catalogTableSchema, String comment) { - this.catalogTableDefinition = catalogTableDefinition; + this.tableId = tableId; this.catalogTableSchema = catalogTableSchema; this.comment = comment; } @@ -47,7 +47,7 @@ public CatalogTable(CatalogTableDefinition catalogTableDefinition, @Override public String toString() { return "CatalogTable{" + - "catalogTableDefinition=" + catalogTableDefinition + + "catalogTableDefinition=" + tableId + ", catalogTableSchema=" + catalogTableSchema + ", comment='" + comment + '\'' + '}'; diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableDefinition.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableDefinition.java deleted file mode 100644 index 0170fcf22..000000000 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableDefinition.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.bytedance.bitsail.common.catalog.table; - -import lombok.Builder; -import lombok.Getter; - -import java.io.Serializable; - -@Getter -@Builder -public class CatalogTableDefinition implements Serializable { - - private final String database; - private final String schema; - private final String table; - - public CatalogTableDefinition(String database, String schema, String table) { - this.database = database; - this.schema = schema; - this.table = table; - } - - @Override - public String toString() { - return "CatalogTableDefinition{" + - "database='" + database + '\'' + - ", schema='" + schema + '\'' + - ", table='" + table + '\'' + - '}'; - } -} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java new file mode 100644 index 000000000..35a08fffe --- /dev/null +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java @@ -0,0 +1,94 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.catalog.table; + +import lombok.Builder; +import lombok.Getter; +import org.apache.commons.lang3.StringUtils; + +import java.io.Serializable; +import java.util.Objects; + +@Getter +@Builder +public class TableId implements Serializable { + private static final int DEFAULT_TABLE_ID_LENGTH = 2; + private static final int CONTAINS_SCHEMA_TABLE_ID_LENGTH = 3; + + private final String database; + private final String schema; + private final String table; + + public TableId(String database, String schema, String table) { + this.database = database; + this.schema = schema; + this.table = table; + } + + public static TableId of(String tableId) { + String[] paths = tableId.split("\\."); + + if (paths.length == DEFAULT_TABLE_ID_LENGTH) { + return of(paths[0], paths[1]); + } + if (paths.length == CONTAINS_SCHEMA_TABLE_ID_LENGTH) { + return of(paths[0], paths[1], paths[2]); + } + throw new IllegalArgumentException( + String.format("Can't get table-id from value: %s", tableId)); + } + + public static TableId of(String database, String table) { + return of(database, null, table); + } + + public static TableId of(String database, String schema, String table) { + return new TableId(database, schema, table); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TableId tableId = (TableId) o; + return Objects.equals(database, tableId.database) && Objects.equals(schema, tableId.schema) && Objects.equals(table, tableId.table); + } + + @Override + public int hashCode() { + return Objects.hash(database, schema, table); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append(database) + .append("."); + + if (StringUtils.isNotEmpty(schema)) { + builder.append(schema) + .append("."); + } + + builder.append(table); + return builder.toString(); + } +} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/WriterOptions.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/WriterOptions.java index fe16b550c..f76c54c2b 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/WriterOptions.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/WriterOptions.java @@ -108,5 +108,13 @@ interface BaseWriterOptions { ConfigOption WRITE_MODE = key(WRITER_PREFIX + "write_mode") .defaultValue("overwrite"); + + ConfigOption MULTIPLE_TABLE_ENABLED = + key(WRITER_PREFIX + "multiple_table_enabled") + .defaultValue(false); + + ConfigOption TABLE_PATTERN = + key(WRITER_PREFIX + "table_pattern") + .defaultValue(".*"); } } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java new file mode 100644 index 000000000..c5d549607 --- /dev/null +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java @@ -0,0 +1,119 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.row; + +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfos; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.stream.Collectors; + +@NoArgsConstructor +@AllArgsConstructor +@Getter +@Builder +public class MultipleTableRow implements Serializable { + + private static final RowTypeInfo MULTIPLE_TABLE_ROW_TYPE_INFO = + new RowTypeInfo(Arrays.stream(MultipleTableField.values()) + .map(MultipleTableField::getName) + .collect(Collectors.toList()) + .toArray(new String[] {}), + + Arrays.stream(MultipleTableField.values()) + .map(MultipleTableField::getTypeInfo) + .collect(Collectors.toList()) + .toArray(new TypeInfo[] {})); + + private String tableId; + + private String key; + + private String value; + + private String offset; + + private String partition; + + public static MultipleTableRow of(String tableId, + String key, + String value, + String offset, + String partition) { + return MultipleTableRow + .builder() + .tableId(tableId) + .key(key) + .value(value) + .offset(offset) + .partition(partition) + .build(); + } + + public static MultipleTableRow of(Row row) { + return MultipleTableRow + .builder() + .tableId(row.getString(MultipleTableField.TABLE_ID_FIELD.index)) + .key(row.getString(MultipleTableField.KEY_FIELD.index)) + .value(row.getString(MultipleTableField.VALUE_FIELD.index)) + .offset(row.getString(MultipleTableField.OFFSET_FIELD.index)) + .partition(row.getString(MultipleTableField.PARTITION_FIELD.index)) + .build(); + } + + public Row asRow() { + Row row = new Row(MultipleTableField.values().length); + row.setField(MultipleTableField.TABLE_ID_FIELD.index, tableId); + row.setField(MultipleTableField.KEY_FIELD.index, key); + row.setField(MultipleTableField.VALUE_FIELD.index, value); + row.setField(MultipleTableField.OFFSET_FIELD.index, offset); + row.setField(MultipleTableField.PARTITION_FIELD.index, partition); + return row; + } + + public RowTypeInfo getRowTypeInfo() { + return MULTIPLE_TABLE_ROW_TYPE_INFO; + } + + @Getter + private enum MultipleTableField { + TABLE_ID_FIELD("table-id", TypeInfos.STRING_TYPE_INFO, 0), + KEY_FIELD("key", TypeInfos.STRING_TYPE_INFO, 1), + VALUE_FIELD("value", TypeInfos.STRING_TYPE_INFO, 2), + OFFSET_FIELD("offset", TypeInfos.STRING_TYPE_INFO, 3), + PARTITION_FIELD("partition", TypeInfos.STRING_TYPE_INFO, 4); + + private final String name; + + private final TypeInfo typeInfo; + + private final int index; + + MultipleTableField(String name, TypeInfo typeInfo, int index) { + this.name = name; + this.typeInfo = typeInfo; + this.index = index; + } + } +} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/GenericTypeInfo.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/GenericTypeInfo.java new file mode 100644 index 000000000..62d8229da --- /dev/null +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/GenericTypeInfo.java @@ -0,0 +1,55 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.typeinfo; + +import com.google.common.base.Preconditions; + +public class GenericTypeInfo extends TypeInfo { + + private final Class typeClass; + + public GenericTypeInfo(Class typeClass) { + this.typeClass = Preconditions.checkNotNull(typeClass); + } + + @Override + public Class getTypeClass() { + return typeClass; + } + + @Override + public int hashCode() { + return typeClass.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof GenericTypeInfo) { + @SuppressWarnings("unchecked") + GenericTypeInfo genericTypeInfo = (GenericTypeInfo) obj; + + return typeClass == genericTypeInfo.typeClass; + } else { + return false; + } + } + + @Override + public String toString() { + return "GenericType<" + typeClass.getCanonicalName() + ">"; + } +} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java index 3fbdbe322..7851bd6ba 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java @@ -22,6 +22,7 @@ import com.bytedance.bitsail.common.column.MapColumn; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.exception.CommonErrorCode; +import com.bytedance.bitsail.common.option.CommonOptions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -38,7 +39,6 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; -import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.HashMap; @@ -52,14 +52,19 @@ public class TypeInfoValueConverter implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(TypeInfoValueConverter.class); - private BitSailConfiguration commonConfiguration; - private DateTimeFormatter dateFormatter; - private DateTimeFormatter timeFormatter; - private DateTimeFormatter dateTimeFormatter; - private ZoneId timezone; + private final BitSailConfiguration commonConfiguration; + private final DateTimeFormatter dateFormatter; + private final DateTimeFormatter timeFormatter; + private final DateTimeFormatter dateTimeFormatter; public TypeInfoValueConverter(BitSailConfiguration commonConfiguration) { this.commonConfiguration = commonConfiguration; + this.dateFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions + .DateFormatOptions.DATE_PATTERN)); + this.timeFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions + .DateFormatOptions.TIME_PATTERN)); + this.dateTimeFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions + .DateFormatOptions.DATE_TIME_PATTERN)); } /** @@ -71,13 +76,15 @@ public Object convertObject(Object value, if (Objects.isNull(value)) { return null; } + if (value instanceof Column) { return convertColumnObject((Column) value, typeInfo); } + if (compareValueTypeInfo(value, typeInfo)) { return value; } - return convertNormalObject(value, typeInfo); + return convertJavaObject(value, typeInfo); } private Object convertColumnObject(Column value, @@ -184,8 +191,8 @@ private Object convertPrimitiveColumnObject(Column column, TypeInfo typeInfo) /** * Compare object's value type match with type info's definition or not. */ - private boolean compareValueTypeInfo(Object value, - TypeInfo typeInfo) { + private static boolean compareValueTypeInfo(Object value, + TypeInfo typeInfo) { if (Objects.isNull(value)) { return true; } @@ -229,7 +236,7 @@ private boolean compareValueTypeInfo(Object value, return value.getClass().isAssignableFrom(typeInfo.getTypeClass()); } - private Object convertNormalObject(Object value, TypeInfo typeInfo) { + private Object convertJavaObject(Object value, TypeInfo typeInfo) { if (Objects.isNull(value)) { return null; } @@ -240,25 +247,25 @@ private Object convertNormalObject(Object value, TypeInfo typeInfo) { "Object can't convert to map type."); } MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - Map raw = (Map) value; - Map map = Maps.newHashMap(); - for (Object key : raw.keySet()) { - map.put(convertNormalObject(key, mapTypeInfo.getKeyTypeInfo()), - convertNormalObject(raw.get(key), mapTypeInfo.getValueTypeInfo())); + Map origin = (Map) value; + Map converted = Maps.newHashMap(); + for (Object key : origin.keySet()) { + converted.put(convertJavaObject(key, mapTypeInfo.getKeyTypeInfo()), + convertJavaObject(origin.get(key), mapTypeInfo.getValueTypeInfo())); } - return map; + return converted; } else if (typeInfo instanceof ListTypeInfo) { if (!(value instanceof List)) { throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, "Object can't convert to list type."); } ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - List raw = (List) value; - List list = Lists.newArrayList(); - for (Object key : raw) { - list.add(convertNormalObject(key, listTypeInfo.getElementTypeInfo())); + List origin = (List) value; + List converted = Lists.newArrayList(); + for (Object key : origin) { + converted.add(convertJavaObject(key, listTypeInfo.getElementTypeInfo())); } - return list; + return converted; } else { return convertPrimitiveObject(value, typeInfo); @@ -280,7 +287,7 @@ private Object convertPrimitiveObject(Object value, TypeInfo typeInfo) { if (value instanceof byte[]) { return new String((byte[]) value, Charset.defaultCharset()); } - return String.valueOf(value); + return value.toString(); } if (TypeInfos.SHORT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { @@ -291,6 +298,9 @@ private Object convertPrimitiveObject(Object value, TypeInfo typeInfo) { } if (TypeInfos.INT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Integer) { + return (Integer) value; + } if (value instanceof Number) { return ((Number) value).intValue(); } @@ -298,6 +308,9 @@ private Object convertPrimitiveObject(Object value, TypeInfo typeInfo) { } if (TypeInfos.LONG_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Long) { + return (Long) value; + } if (value instanceof Number) { return ((Number) value).longValue(); } @@ -330,6 +343,12 @@ private Object convertPrimitiveObject(Object value, TypeInfo typeInfo) { } if (TypeInfos.BOOLEAN_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { + if (value instanceof Integer) { + return (Integer) value != 0; + } + if (value instanceof Long) { + return (Long) value != 0; + } String str = value.toString(); return Boolean.parseBoolean(str); } diff --git a/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/TableCatalogManagerTest.java b/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/CatalogManagerTest.java similarity index 84% rename from bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/TableCatalogManagerTest.java rename to bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/CatalogManagerTest.java index 49ce27901..3d2454576 100644 --- a/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/TableCatalogManagerTest.java +++ b/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/CatalogManagerTest.java @@ -16,9 +16,9 @@ package com.bytedance.bitsail.common.catalog; -import com.bytedance.bitsail.common.catalog.fake.FakeTableCatalog; +import com.bytedance.bitsail.common.catalog.fake.FakeCatalog; import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; -import com.bytedance.bitsail.common.catalog.table.CatalogTableDefinition; +import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.model.ColumnInfo; import com.bytedance.bitsail.common.option.ReaderOptions; @@ -32,7 +32,7 @@ import java.util.List; -public class TableCatalogManagerTest { +public class CatalogManagerTest { private BitSailConfiguration commonConfiguration; private BitSailConfiguration readerConfiguration; @@ -68,16 +68,16 @@ public void testTableCatalogAlignmentIntersect() throws Exception { commonConfiguration.set(TableCatalogOptions.COLUMN_ALIGN_STRATEGY, TableCatalogStrategy.INTERSECT.name()); commonConfiguration.set(TableCatalogOptions.SYNC_DDL, true); - FakeTableCatalog readerFakeTableCatalog = new FakeTableCatalog(readerColumns, CatalogTableDefinition.builder() + FakeCatalog readerFakeTableCatalog = new FakeCatalog(readerColumns, TableId.builder() .database("a") .table("b").build()); - FakeTableCatalog writerFakeTableCatalog = new FakeTableCatalog(writerColumns, CatalogTableDefinition.builder() + FakeCatalog writerFakeTableCatalog = new FakeCatalog(writerColumns, TableId.builder() .database("a") .table("c").build()); TableCatalogManager tableCatalogManager = TableCatalogManager.builder() - .readerTableCatalog(readerFakeTableCatalog) - .writerTableCatalog(writerFakeTableCatalog) + .readerCatalog(readerFakeTableCatalog) + .writerCatalog(writerFakeTableCatalog) .writerTypeInfoConverter(new BitSailTypeInfoConverter()) .readerTypeInfoConverter(new BitSailTypeInfoConverter()) .commonConfiguration(commonConfiguration) @@ -117,16 +117,16 @@ public void testTableCatalogAlignmentSourceOnly() throws Exception { TableCatalogStrategy.SOURCE_ONLY.name()); commonConfiguration.set(TableCatalogOptions.SYNC_DDL, true); - FakeTableCatalog readerFakeTableCatalog = new FakeTableCatalog(readerColumns, CatalogTableDefinition.builder() + FakeCatalog readerFakeTableCatalog = new FakeCatalog(readerColumns, TableId.builder() .database("a") .table("b").build()); - FakeTableCatalog writerFakeTableCatalog = new FakeTableCatalog(writerColumns, CatalogTableDefinition.builder() + FakeCatalog writerFakeTableCatalog = new FakeCatalog(writerColumns, TableId.builder() .database("a") .table("c").build()); TableCatalogManager tableCatalogManager = TableCatalogManager.builder() - .readerTableCatalog(readerFakeTableCatalog) - .writerTableCatalog(writerFakeTableCatalog) + .readerCatalog(readerFakeTableCatalog) + .writerCatalog(writerFakeTableCatalog) .writerTypeInfoConverter(new BitSailTypeInfoConverter()) .readerTypeInfoConverter(new BitSailTypeInfoConverter()) .commonConfiguration(commonConfiguration) @@ -167,16 +167,16 @@ public void testColumnAlignmentUpdate() throws Exception { commonConfiguration.set(TableCatalogOptions.COLUMN_ALIGN_STRATEGY, TableCatalogStrategy.SOURCE_ONLY.name()); commonConfiguration.set(TableCatalogOptions.SYNC_DDL, true); - FakeTableCatalog readerFakeTableCatalog = new FakeTableCatalog(readerColumns, CatalogTableDefinition.builder() + FakeCatalog readerFakeTableCatalog = new FakeCatalog(readerColumns, TableId.builder() .database("a") .table("b").build()); - FakeTableCatalog writerFakeTableCatalog = new FakeTableCatalog(writerColumns, CatalogTableDefinition.builder() + FakeCatalog writerFakeTableCatalog = new FakeCatalog(writerColumns, TableId.builder() .database("a") .table("c").build()); TableCatalogManager tableCatalogManager = TableCatalogManager.builder() - .readerTableCatalog(readerFakeTableCatalog) - .writerTableCatalog(writerFakeTableCatalog) + .readerCatalog(readerFakeTableCatalog) + .writerCatalog(writerFakeTableCatalog) .writerTypeInfoConverter(new BitSailTypeInfoConverter()) .readerTypeInfoConverter(new BitSailTypeInfoConverter()) .commonConfiguration(commonConfiguration) diff --git a/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/FakeTableCatalog.java b/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/FakeCatalog.java similarity index 81% rename from bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/FakeTableCatalog.java rename to bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/FakeCatalog.java index d3dc041fe..bdb904082 100644 --- a/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/FakeTableCatalog.java +++ b/bitsail-common/src/test/java/com/bytedance/bitsail/common/catalog/fake/FakeCatalog.java @@ -16,11 +16,11 @@ package com.bytedance.bitsail.common.catalog.fake; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.catalog.table.CatalogTable; import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; -import com.bytedance.bitsail.common.catalog.table.CatalogTableDefinition; import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.catalog.table.TableOperation; import com.bytedance.bitsail.common.model.ColumnInfo; import com.bytedance.bitsail.common.type.BitSailTypeInfoConverter; @@ -33,11 +33,11 @@ import java.util.List; import java.util.stream.Collectors; -public class FakeTableCatalog implements TableCatalog { +public class FakeCatalog implements Catalog { private final List columnInfos; - private final CatalogTableDefinition tableDefinition; + private final TableId tableDefinition; private final TypeInfoConverter typeInfoConverter; @@ -50,8 +50,8 @@ public class FakeTableCatalog implements TableCatalog { @Getter private final List deletedTableColumns; - public FakeTableCatalog(List columnInfos, - CatalogTableDefinition tableDefinition) { + public FakeCatalog(List columnInfos, + TableId tableDefinition) { this.columnInfos = columnInfos; this.tableDefinition = tableDefinition; this.typeInfoConverter = new BitSailTypeInfoConverter(); @@ -78,29 +78,34 @@ public void close() { } @Override - public CatalogTableDefinition createCatalogTableDefinition() { + public TableId createCatalogTableDefinition() { return tableDefinition; } @Override - public boolean tableExists(CatalogTableDefinition catalogTableDefinition) { + public List listTables() { + return null; + } + + @Override + public boolean tableExists(TableId catalogTableDefinition) { return true; } @Override - public CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinition) { + public CatalogTable getCatalogTable(TableId catalogTableDefinition) { CatalogTableSchema tableSchema = CatalogTableSchema.builder() .columns(catalogTableColumns) .primaryKeys(null) .build(); return CatalogTable.builder() - .catalogTableDefinition(catalogTableDefinition) + .tableId(catalogTableDefinition) .catalogTableSchema(tableSchema) .build(); } @Override - public void createTable(CatalogTableDefinition catalogTableDefinition, CatalogTable catalogTable) { + public void createTable(TableId catalogTableDefinition, CatalogTable catalogTable) { } @@ -129,6 +134,6 @@ public boolean compareTypeCompatible(TypeInfo original, TypeInfo compared) @Override public List convertTableColumn(TypeInfoConverter typeInfoConverter, List columnInfos) { - return TableCatalog.super.convertTableColumn(typeInfoConverter, columnInfos); + return Catalog.super.convertTableColumn(typeInfoConverter, columnInfos); } } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java index ab2174a03..6fb9628c6 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java @@ -22,8 +22,8 @@ public interface DebeziumDeserializationSchema extends DeserializationSchema { @Override - public SourceRecord deserialize(byte[] message); + SourceRecord deserialize(byte[] message); @Override - public boolean isEndOfStream(SourceRecord nextElement); + boolean isEndOfStream(SourceRecord nextElement); } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java new file mode 100644 index 000000000..ae02dd90a --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java @@ -0,0 +1,325 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.component.format.debezium; + +import com.bytedance.bitsail.base.format.DeserializationSchema; +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.exception.CommonErrorCode; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.row.RowKind; +import com.bytedance.bitsail.component.format.debezium.option.DebeziumReaderOptions; + +import io.debezium.data.Enum; +import io.debezium.data.EnumSet; +import io.debezium.data.Envelope; +import io.debezium.data.Json; +import io.debezium.data.SpecialValueDecimal; +import io.debezium.data.VariableScaleDecimal; +import io.debezium.time.Date; +import io.debezium.time.MicroTime; +import io.debezium.time.MicroTimestamp; +import io.debezium.time.NanoTime; +import io.debezium.time.NanoTimestamp; +import io.debezium.time.Time; +import io.debezium.time.Timestamp; +import io.debezium.time.Year; +import io.debezium.time.ZonedTime; +import io.debezium.time.ZonedTimestamp; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.kafka.connect.data.Decimal; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaAndValue; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.json.JsonConverterConfig; +import org.apache.kafka.connect.storage.ConverterConfig; +import org.apache.kafka.connect.storage.ConverterType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.ZoneId; +import java.util.HashMap; +import java.util.Objects; + +import static io.debezium.data.Envelope.FieldName.AFTER; +import static io.debezium.data.Envelope.FieldName.BEFORE; +import static io.debezium.data.Envelope.FieldName.OPERATION; +import static org.apache.kafka.connect.data.Values.convertToDate; +import static org.apache.kafka.connect.data.Values.convertToTime; +import static org.apache.kafka.connect.data.Values.convertToTimestamp; + +public class DebeziumRowDeserializationSchema implements DeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(DebeziumRowDeserializationSchema.class); + + private final BitSailConfiguration jobConf; + private final JsonConverter jsonConverter; + + public DebeziumRowDeserializationSchema(BitSailConfiguration jobConf) { + this.jobConf = jobConf; + this.jsonConverter = new JsonConverter(); + boolean includeSchema = jobConf.get(DebeziumReaderOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); + final HashMap configs = new HashMap<>(); + configs.put(ConverterConfig.TYPE_CONFIG, ConverterType.VALUE.getName()); + configs.put(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, includeSchema); + jsonConverter.configure(configs); + } + + @Override + public Row deserialize(String message) { + throw new UnsupportedOperationException("Please invoke DeserializationSchema#deserialize(byte[], fieldNames) instead."); + } + + public Row deserialize(String message, String[] fieldNames) { + SchemaAndValue schemaAndValue; + try { + schemaAndValue = jsonConverter.toConnectData(null, message.getBytes(StandardCharsets.UTF_8)); + } catch (Exception e) { + LOG.error("Can't parse content from format [debezium], content: {}.", message, e); + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Can't parse debezium json: %s.", message), e); + } + Struct value = (Struct) schemaAndValue.value(); + Envelope.Operation operation = Envelope.Operation.forCode(value.getString(OPERATION)); + + if (operation == Envelope.Operation.CREATE || operation == Envelope.Operation.READ) { + Struct after = value.getStruct(AFTER); + return convert(after, after.schema(), fieldNames, RowKind.INSERT); + } + + if (operation == Envelope.Operation.DELETE) { + Struct before = value.getStruct(BEFORE); + return convert(before, before.schema(), fieldNames, RowKind.DELETE); + } + + if (operation == Envelope.Operation.UPDATE) { + Struct after = value.getStruct(AFTER); + return convert(after, after.schema(), fieldNames, RowKind.UPDATE_AFTER); + } + + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Not support operation: %s right now.", operation)); + } + + public Row convert(Struct struct, Schema schema, String[] fieldNames, RowKind rowKind) { + Row row = new Row(fieldNames.length); + row.setKind(rowKind); + for (int index = 0; index < fieldNames.length; index++) { + Object withoutDefault = struct.getWithoutDefault(fieldNames[index]); + Field field = schema.field(fieldNames[index]); + if (Objects.isNull(withoutDefault)) { + row.setField(index, null); + } else { + try { + row.setField(index, convert(field.schema(), withoutDefault)); + } catch (BitSailException e) { + LOG.error("Failed to parse field {} from value {}.", field.name(), withoutDefault); + throw e; + } + } + } + return row; + } + + private Object convert(Schema fieldSchema, Object withoutDefault) { + if (isPrimitiveType(fieldSchema)) { + return convertPrimitiveType(fieldSchema, withoutDefault); + } else { + //todo support local timestamp zone. + return convertOtherType(fieldSchema, withoutDefault, null); + } + } + + private static boolean isPrimitiveType(Schema fieldSchema) { + return fieldSchema.name() == null; + } + + private Object convertPrimitiveType(Schema fieldSchema, Object fieldValue) { + switch (fieldSchema.type()) { + case BOOLEAN: + return convertToBoolean(fieldValue); + case INT8: + case INT16: + case INT32: + return convertToInteger(fieldValue); + case INT64: + return convertToLong(fieldValue); + case FLOAT32: + return convertToFloat(fieldValue); + case FLOAT64: + return convertToDouble(fieldValue); + case STRING: + return convertToString(fieldValue); + case BYTES: + return convertToBinary(fieldValue); + default: + throw new UnsupportedOperationException("Not support type: " + fieldSchema.type()); + } + } + + private Object convertOtherType(Schema fieldSchema, Object fieldValue, ZoneId serverTimeZone) { + switch (fieldSchema.name()) { + case Enum.LOGICAL_NAME: + case Json.LOGICAL_NAME: + case EnumSet.LOGICAL_NAME: + return convertToString(fieldValue); + case Time.SCHEMA_NAME: + case MicroTime.SCHEMA_NAME: + case NanoTime.SCHEMA_NAME: + return convertToTime(fieldSchema, fieldValue); + case Timestamp.SCHEMA_NAME: + case MicroTimestamp.SCHEMA_NAME: + case NanoTimestamp.SCHEMA_NAME: + return convertToTimestamp(fieldSchema, fieldValue); + case Decimal.LOGICAL_NAME: + return convertToDecimal(fieldSchema, fieldValue); + case Date.SCHEMA_NAME: + return convertToDate(fieldSchema, fieldValue); + case Year.SCHEMA_NAME: + return convertToInteger(fieldValue); + case ZonedTime.SCHEMA_NAME: + case ZonedTimestamp.SCHEMA_NAME: + return convertToZoneTimeStamp(fieldSchema, fieldValue); + default: + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Field name %s not support schema %s.", + fieldSchema.name(), + fieldSchema.schema() + ) + ); + } + } + + private byte[] convertToBinary(Object fieldValue) { + if (fieldValue instanceof byte[]) { + return (byte[]) fieldValue; + } else if (fieldValue instanceof ByteBuffer) { + ByteBuffer byteBuffer = (ByteBuffer) fieldValue; + byte[] bytes = new byte[byteBuffer.remaining()]; + byteBuffer.get(bytes); + return bytes; + } else { + throw new UnsupportedOperationException( + "Unsupported Binary value type: " + fieldValue.getClass().getSimpleName()); + } + } + + private String convertToString(Object fieldValue) { + return fieldValue.toString(); + } + + private Double convertToDouble(Object fieldValue) { + if (fieldValue instanceof Float) { + return ((Float) fieldValue).doubleValue(); + } else if (fieldValue instanceof Double) { + return (Double) fieldValue; + } else { + return Double.parseDouble(fieldValue.toString()); + } + } + + private Float convertToFloat(Object fieldValue) { + if (fieldValue instanceof Float) { + return (Float) fieldValue; + } else if (fieldValue instanceof Double) { + return ((Double) fieldValue).floatValue(); + } else { + return Float.parseFloat(fieldValue.toString()); + } + } + + private Long convertToLong(Object fieldValue) { + if (fieldValue instanceof Integer) { + return ((Integer) fieldValue).longValue(); + } else if (fieldValue instanceof Long) { + return (Long) fieldValue; + } else { + return Long.parseLong(fieldValue.toString()); + } + } + + private Boolean convertToBoolean(Object fieldValue) { + if (fieldValue instanceof Integer) { + return (Integer) fieldValue != 0; + } else if (fieldValue instanceof Long) { + return (Long) fieldValue != 0; + } else { + String str = fieldValue.toString(); + if (NumberUtils.isNumber(str)) { + return NumberUtils.createNumber(str).intValue() != 0; + } + return Boolean.parseBoolean(fieldValue.toString()); + } + } + + private Integer convertToInteger(Object fieldValue) { + if (fieldValue instanceof Integer) { + return (Integer) (fieldValue); + } else if (fieldValue instanceof Long) { + return ((Long) fieldValue).intValue(); + } else { + return Integer.parseInt(fieldValue.toString()); + } + } + + public Object convertToDecimal(Schema schema, Object fieldValue) { + BigDecimal bigDecimal; + if (fieldValue instanceof byte[]) { + // for decimal.handling.mode=precise + bigDecimal = Decimal.toLogical(schema, (byte[]) fieldValue); + } else if (fieldValue instanceof String) { + // for decimal.handling.mode=string + bigDecimal = new BigDecimal((String) fieldValue); + } else if (fieldValue instanceof Double) { + // for decimal.handling.mode=double + bigDecimal = BigDecimal.valueOf((Double) fieldValue); + } else { + if (VariableScaleDecimal.LOGICAL_NAME.equals(schema.name())) { + SpecialValueDecimal decimal = + VariableScaleDecimal.toLogical((Struct) fieldValue); + bigDecimal = decimal.getDecimalValue().orElse(BigDecimal.ZERO); + } else { + // fallback to string + bigDecimal = new BigDecimal(fieldValue.toString()); + } + } + return bigDecimal; + } + + private Object convertToZoneTimeStamp(Schema fieldSchema, Object fieldValue) { + if (fieldValue instanceof String) { + String str = (String) fieldValue; + Instant instant = Instant.parse(str); + //TODO zone timestamp + } + throw BitSailException.asBitSailException(CommonErrorCode.RUNTIME_ERROR, + String.format("Can't parse field [%s] from value %s to zone timestamp.", + fieldSchema.name(), + fieldValue + )); + } + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } +} diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumReaderOptions.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumReaderOptions.java index 1fb616793..87b282cdb 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumReaderOptions.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumReaderOptions.java @@ -16,7 +16,15 @@ package com.bytedance.bitsail.component.format.debezium.option; +import com.bytedance.bitsail.common.option.ConfigOption; import com.bytedance.bitsail.common.option.ReaderOptions; +import static com.bytedance.bitsail.common.option.ConfigOptions.key; +import static com.bytedance.bitsail.common.option.ReaderOptions.READER_PREFIX; + public interface DebeziumReaderOptions extends ReaderOptions.BaseReaderOptions { + + ConfigOption DEBEZIUM_JSON_INCLUDE_SCHEMA = + key(READER_PREFIX + "debezium_json_include_schema") + .defaultValue(true); } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumWriterOptions.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumWriterOptions.java index 0b885e3f3..05321b1cb 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumWriterOptions.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/option/DebeziumWriterOptions.java @@ -24,6 +24,6 @@ public interface DebeziumWriterOptions extends WriterOptions.BaseWriterOptions { ConfigOption DEBEZIUM_JSON_INCLUDE_SCHEMA = - key(WRITER_PREFIX + "DEBEZIUM_JSON_INCLUDE_SCHEMA") - .defaultValue(false); + key(WRITER_PREFIX + "debezium_json_include_schema") + .defaultValue(true); } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java new file mode 100644 index 000000000..a592dd0ac --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java @@ -0,0 +1,48 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.component.format.debezium; + +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.row.Row; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.math.BigDecimal; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Paths; + +public class DebeziumRowDeserializationSchemaTest { + + @Test + public void test() throws URISyntaxException, IOException { + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + .class.getClassLoader().getResource("file/debezium.json") + .toURI().getPath())); + DebeziumRowDeserializationSchema debeziumRowDeserializationSchema = + new DebeziumRowDeserializationSchema(BitSailConfiguration.newDefault()); + + Row deserialize = debeziumRowDeserializationSchema.deserialize(new String(bytes), + new String[] {"double_type"}); + + Assert.assertNotNull(deserialize); + Assert.assertTrue(deserialize.getField(0) instanceof BigDecimal); + } + +} \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium.json b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium.json new file mode 100644 index 000000000..5699f29b4 --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium.json @@ -0,0 +1,257 @@ +{ + "schema":{ + "type":"struct", + "fields":[ + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"int_type" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"4", + "connect.decimal.precision":"20" + }, + "field":"double_type" + }, + { + "type":"string", + "optional":true, + "field":"date_type" + }, + { + "type":"string", + "optional":true, + "default":"", + "field":"varchar_type" + }, + { + "type":"int32", + "optional":false, + "name":"io.debezium.time.Date", + "version":1, + "field":"datetime" + } + ], + "optional":true, + "name":"localhost.test.jdbc_source_test.Value", + "field":"before" + }, + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"int_type" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"4", + "connect.decimal.precision":"20" + }, + "field":"double_type" + }, + { + "type":"string", + "optional":true, + "field":"date_type" + }, + { + "type":"string", + "optional":true, + "default":"", + "field":"varchar_type" + }, + { + "type":"int32", + "optional":false, + "name":"io.debezium.time.Date", + "version":1, + "field":"datetime" + } + ], + "optional":true, + "name":"localhost.test.jdbc_source_test.Value", + "field":"after" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"version" + }, + { + "type":"string", + "optional":false, + "field":"connector" + }, + { + "type":"string", + "optional":false, + "field":"name" + }, + { + "type":"int64", + "optional":false, + "field":"ts_ms" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Enum", + "version":1, + "parameters":{ + "allowed":"true,last,false" + }, + "default":"false", + "field":"snapshot" + }, + { + "type":"string", + "optional":false, + "field":"db" + }, + { + "type":"string", + "optional":true, + "field":"sequence" + }, + { + "type":"string", + "optional":true, + "field":"table" + }, + { + "type":"int64", + "optional":false, + "field":"server_id" + }, + { + "type":"string", + "optional":true, + "field":"gtid" + }, + { + "type":"string", + "optional":false, + "field":"file" + }, + { + "type":"int64", + "optional":false, + "field":"pos" + }, + { + "type":"int32", + "optional":false, + "field":"row" + }, + { + "type":"int64", + "optional":true, + "field":"thread" + }, + { + "type":"string", + "optional":true, + "field":"query" + } + ], + "optional":false, + "name":"io.debezium.connector.mysql.Source", + "field":"source" + }, + { + "type":"string", + "optional":false, + "field":"op" + }, + { + "type":"int64", + "optional":true, + "field":"ts_ms" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":false, + "field":"total_order" + }, + { + "type":"int64", + "optional":false, + "field":"data_collection_order" + } + ], + "optional":true, + "field":"transaction" + } + ], + "optional":false, + "name":"localhost.test.jdbc_source_test.Envelope" + }, + "payload":{ + "before":null, + "after":{ + "id":1, + "int_type":1001, + "double_type":"AbH8", + "date_type":"2022-10-01", + "varchar_type":"varchar_01", + "datetime":19297 + }, + "source":{ + "version":"1.6.4.Final", + "connector":"mysql", + "name":"localhost", + "ts_ms":1682237044000, + "snapshot":"false", + "db":"test", + "sequence":null, + "table":"jdbc_source_test", + "server_id":1, + "gtid":null, + "file":"binlog.000002", + "pos":986, + "row":0, + "thread":null, + "query":null + }, + "op":"c", + "ts_ms":1682237048134, + "transaction":null + } +} \ No newline at end of file diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalog.java b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveCatalog.java similarity index 83% rename from bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalog.java rename to bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveCatalog.java index 2a98a6380..79388bc87 100644 --- a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalog.java +++ b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveCatalog.java @@ -16,11 +16,11 @@ package com.bytedance.bitsail.connector.legacy.hive.common; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.catalog.table.CatalogTable; import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; -import com.bytedance.bitsail.common.catalog.table.CatalogTableDefinition; import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.catalog.table.TableOperation; import com.bytedance.bitsail.common.model.ColumnInfo; import com.bytedance.bitsail.common.type.TypeInfoConverter; @@ -36,8 +36,8 @@ import java.util.List; @Builder -public class HiveTableCatalog implements TableCatalog { - private static final Logger LOG = LoggerFactory.getLogger(HiveTableCatalog.class); +public class HiveCatalog implements Catalog { + private static final Logger LOG = LoggerFactory.getLogger(HiveCatalog.class); private final String namespace; private final String database; @@ -58,8 +58,8 @@ public void close() { } @Override - public CatalogTableDefinition createCatalogTableDefinition() { - return CatalogTableDefinition + public TableId createCatalogTableDefinition() { + return TableId .builder() .database(database) .table(table) @@ -67,20 +67,25 @@ public CatalogTableDefinition createCatalogTableDefinition() { } @Override - public boolean tableExists(CatalogTableDefinition catalogTableDefinition) { + public List listTables() { + return null; + } + + @Override + public boolean tableExists(TableId catalogTableDefinition) { //todo real check. return true; } @Override - public CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinition) { + public CatalogTable getCatalogTable(TableId catalogTableDefinition) { try { List columnInfo = HiveMetaClientUtil.getColumnInfo(hiveConf, catalogTableDefinition.getDatabase(), catalogTableDefinition.getTable()); return CatalogTable.builder() - .catalogTableDefinition(catalogTableDefinition) + .tableId(catalogTableDefinition) .catalogTableSchema(getCatalogTableSchema(columnInfo)) .build(); } catch (Exception e) { @@ -90,7 +95,7 @@ public CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinitio } @Override - public void createTable(CatalogTableDefinition catalogTableDefinition, CatalogTable catalogTable) { + public void createTable(TableId catalogTableDefinition, CatalogTable catalogTable) { throw new UnsupportedOperationException(); } diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalogFactory.java b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalogFactory.java index 5e68ea1e9..5b570fb7b 100644 --- a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalogFactory.java +++ b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/java/com/bytedance/bitsail/connector/legacy/hive/common/HiveTableCatalogFactory.java @@ -16,28 +16,26 @@ package com.bytedance.bitsail.connector.legacy.hive.common; -import com.bytedance.bitsail.base.catalog.TableCatalogFactory; +import com.bytedance.bitsail.base.catalog.CatalogFactory; import com.bytedance.bitsail.base.connector.BuilderGroup; -import com.bytedance.bitsail.base.execution.ExecutionEnviron; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.exception.FrameworkErrorCode; import com.bytedance.bitsail.connector.legacy.hive.option.HiveReaderOptions; import com.bytedance.bitsail.connector.legacy.hive.option.HiveWriterOptions; import com.bytedance.bitsail.connector.legacy.hive.util.HiveConfUtils; -public class HiveTableCatalogFactory implements TableCatalogFactory { +public class HiveTableCatalogFactory implements CatalogFactory { @Override - public TableCatalog createTableCatalog(BuilderGroup builderGroup, - ExecutionEnviron executionEnviron, - BitSailConfiguration connectorConfiguration) { + public Catalog createTableCatalog(BuilderGroup builderGroup, + BitSailConfiguration connectorConfiguration) { if (BuilderGroup.READER.equals(builderGroup)) { String database = connectorConfiguration .getNecessaryOption(HiveReaderOptions.DB_NAME, FrameworkErrorCode.REQUIRED_VALUE); String table = connectorConfiguration .getNecessaryOption(HiveReaderOptions.TABLE_NAME, FrameworkErrorCode.REQUIRED_VALUE); - return HiveTableCatalog + return HiveCatalog .builder() .database(database) .table(table) @@ -50,7 +48,7 @@ public TableCatalog createTableCatalog(BuilderGroup builderGroup, .getNecessaryOption(HiveWriterOptions.DB_NAME, FrameworkErrorCode.REQUIRED_VALUE); String table = connectorConfiguration .getNecessaryOption(HiveWriterOptions.TABLE_NAME, FrameworkErrorCode.REQUIRED_VALUE); - return HiveTableCatalog + return HiveCatalog .builder() .database(database) .table(table) diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.TableCatalogFactory b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.CatalogFactory similarity index 100% rename from bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.TableCatalogFactory rename to bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.CatalogFactory diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalog.java b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLCatalog.java similarity index 80% rename from bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalog.java rename to bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLCatalog.java index ebbb0fb1b..395c164ca 100644 --- a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalog.java +++ b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLCatalog.java @@ -17,11 +17,11 @@ package com.bytedance.bitsail.connector.legacy.jdbc.catalog; import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.catalog.table.CatalogTable; import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; -import com.bytedance.bitsail.common.catalog.table.CatalogTableDefinition; import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.catalog.table.TableOperation; import com.bytedance.bitsail.common.type.TypeInfoConverter; import com.bytedance.bitsail.common.typeinfo.TypeInfo; @@ -36,8 +36,8 @@ import java.util.List; -public class MySQLTableCatalog implements TableCatalog { - private static final Logger LOG = LoggerFactory.getLogger(MySQLTableCatalog.class); +public class MySQLCatalog implements Catalog { + private static final Logger LOG = LoggerFactory.getLogger(MySQLCatalog.class); private final String database; private final String table; @@ -50,13 +50,13 @@ public class MySQLTableCatalog implements TableCatalog { private TypeInfoConverter typeInfoConverter; @Builder - public MySQLTableCatalog(String database, - String table, - String schema, - String username, - String password, - String url, - String customizedSQL) { + public MySQLCatalog(String database, + String table, + String schema, + String username, + String password, + String url, + String customizedSQL) { this.database = database; this.table = table; this.schema = schema; @@ -78,8 +78,8 @@ public void close() { } @Override - public CatalogTableDefinition createCatalogTableDefinition() { - return CatalogTableDefinition + public TableId createCatalogTableDefinition() { + return TableId .builder() .database(database) .table(table) @@ -87,13 +87,18 @@ public CatalogTableDefinition createCatalogTableDefinition() { } @Override - public boolean tableExists(CatalogTableDefinition catalogTableDefinition) { + public List listTables() { + return null; + } + + @Override + public boolean tableExists(TableId catalogTableDefinition) { //todo doesn't check return true; } @Override - public CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinition) { + public CatalogTable getCatalogTable(TableId catalogTableDefinition) { TableInfo tableInfo; try { if (useCustomizedSQL) { @@ -109,7 +114,7 @@ public CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinitio } return CatalogTable .builder() - .catalogTableDefinition(catalogTableDefinition) + .tableId(catalogTableDefinition) .catalogTableSchema(CatalogTableSchema.builder() .columns(convertTableColumn( typeInfoConverter, @@ -119,7 +124,7 @@ public CatalogTable getCatalogTable(CatalogTableDefinition catalogTableDefinitio } @Override - public void createTable(CatalogTableDefinition catalogTableDefinition, CatalogTable catalogTable) { + public void createTable(TableId catalogTableDefinition, CatalogTable catalogTable) { throw new UnsupportedOperationException(); } diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalogFactory.java b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalogFactory.java index 27485800f..01f742e91 100644 --- a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalogFactory.java +++ b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/java/com/bytedance/bitsail/connector/legacy/jdbc/catalog/MySQLTableCatalogFactory.java @@ -16,11 +16,10 @@ package com.bytedance.bitsail.connector.legacy.jdbc.catalog; -import com.bytedance.bitsail.base.catalog.TableCatalogFactory; +import com.bytedance.bitsail.base.catalog.CatalogFactory; import com.bytedance.bitsail.base.connector.BuilderGroup; -import com.bytedance.bitsail.base.execution.ExecutionEnviron; import com.bytedance.bitsail.common.BitSailException; -import com.bytedance.bitsail.common.catalog.table.TableCatalog; +import com.bytedance.bitsail.common.catalog.table.Catalog; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.exception.CommonErrorCode; import com.bytedance.bitsail.common.exception.FrameworkErrorCode; @@ -34,19 +33,18 @@ import java.util.List; -public class MySQLTableCatalogFactory implements TableCatalogFactory { +public class MySQLTableCatalogFactory implements CatalogFactory { @Override - public TableCatalog createTableCatalog(BuilderGroup builderGroup, - ExecutionEnviron executionEnviron, - BitSailConfiguration connectorConfiguration) { + public Catalog createTableCatalog(BuilderGroup builderGroup, + BitSailConfiguration connectorConfiguration) { if (BuilderGroup.READER.equals(builderGroup)) { List connections = connectorConfiguration .getNecessaryOption(JdbcReaderOptions.CONNECTIONS, FrameworkErrorCode.REQUIRED_VALUE); - return MySQLTableCatalog + return MySQLCatalog .builder() .database(connectorConfiguration.get(JdbcReaderOptions.DB_NAME)) .table(connectorConfiguration.get(JdbcReaderOptions.TABLE_NAME)) @@ -59,7 +57,7 @@ public TableCatalog createTableCatalog(BuilderGroup builderGroup, .url(getClusterUrl(connections)) .build(); } - return MySQLTableCatalog + return MySQLCatalog .builder() .username(connectorConfiguration.get(JdbcWriterOptions.USER_NAME)) .password(connectorConfiguration.get(JdbcWriterOptions.PASSWORD)) diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.TableCatalogFactory b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.CatalogFactory similarity index 100% rename from bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.TableCatalogFactory rename to bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-jdbc/src/main/resources/META-INF/services/com.bytedance.bitsail.base.catalog.CatalogFactory diff --git a/bitsail-cores/bitsail-core-common/pom.xml b/bitsail-cores/bitsail-core-common/pom.xml new file mode 100644 index 000000000..edcb415ac --- /dev/null +++ b/bitsail-cores/bitsail-core-common/pom.xml @@ -0,0 +1,56 @@ + + + + + + bitsail-cores + com.bytedance.bitsail + ${revision} + + 4.0.0 + + bitsail-core-common + + + + com.bytedance.bitsail + bitsail-base + ${revision} + + + + com.bytedance.bitsail + bitsail-common + ${revision} + + + + com.bytedance.bitsail + bitsail-component-format-debezium + ${revision} + + + + com.bytedance.bitsail + connector-print + ${revision} + + + + \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializer.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializer.java new file mode 100644 index 000000000..43ef553a1 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializer.java @@ -0,0 +1,96 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.serializer.multiple; + +import com.bytedance.bitsail.base.serializer.BinarySerializer; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; + +import com.google.common.collect.Lists; +import org.apache.commons.lang3.SerializationUtils; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class MultipleTableCommitSerializer implements BinarySerializer> { + + private BinarySerializer original; + + public MultipleTableCommitSerializer(BinarySerializer original) { + this.original = original; + } + + @Override + public int getVersion() { + return original.getVersion(); + } + + @Override + public byte[] serialize(MultipleTableCommit multipleTableCommit) throws IOException { + if (Objects.isNull(multipleTableCommit)) { + return null; + } + + byte[] buffer; + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + TableId tableId = multipleTableCommit.getTableId(); + List commits = multipleTableCommit.getCommits(); + buffer = SerializationUtils.serialize(tableId); + outputStream.write(buffer.length); + outputStream.write(buffer); + for (CommitT commitT : commits) { + buffer = original.serialize(commitT); + outputStream.write(buffer.length); + outputStream.write(buffer); + } + buffer = outputStream.toByteArray(); + } + + return buffer; + } + + @Override + public MultipleTableCommit deserialize(int version, byte[] serialized) throws IOException { + if (serialized == null) { + return null; + } + MultipleTableCommit.MultipleTableCommitBuilder builder = MultipleTableCommit.builder(); + try (ByteArrayInputStream inputStream = new ByteArrayInputStream(serialized)) { + int length = inputStream.read(); + byte[] buffer = new byte[length]; + + inputStream.read(buffer, 0, length); + TableId tableId = SerializationUtils.deserialize(buffer); + builder.tableId(tableId); + + List commits = Lists.newArrayList(); + while ((length = inputStream.read()) != -1) { + buffer = new byte[length]; + inputStream.read(buffer, 0, length); + CommitT deserialize = original.deserialize(version, buffer); + commits.add(deserialize); + } + + builder.commits(commits); + + } + return builder.build(); + } +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializer.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializer.java new file mode 100644 index 000000000..62c00d64f --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializer.java @@ -0,0 +1,96 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.serializer.multiple; + +import com.bytedance.bitsail.base.serializer.BinarySerializer; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; + +import com.google.common.collect.Lists; +import org.apache.commons.lang3.SerializationUtils; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class MultipleTableStateSerializer implements BinarySerializer> { + + private BinarySerializer original; + + public MultipleTableStateSerializer(BinarySerializer original) { + this.original = original; + } + + @Override + public int getVersion() { + return original.getVersion(); + } + + @Override + public byte[] serialize(MultipleTableState multipleTableCommit) throws IOException { + if (Objects.isNull(multipleTableCommit)) { + return null; + } + + byte[] buffer; + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + TableId tableId = multipleTableCommit.getTableId(); + List commits = multipleTableCommit.getState(); + buffer = SerializationUtils.serialize(tableId); + outputStream.write(buffer.length); + outputStream.write(buffer); + for (WriterStateT commitT : commits) { + buffer = original.serialize(commitT); + outputStream.write(buffer.length); + outputStream.write(buffer); + } + buffer = outputStream.toByteArray(); + } + + return buffer; + } + + @Override + public MultipleTableState deserialize(int version, byte[] serialized) throws IOException { + if (serialized == null) { + return null; + } + MultipleTableState.MultipleTableStateBuilder builder = MultipleTableState.builder(); + try (ByteArrayInputStream inputStream = new ByteArrayInputStream(serialized)) { + int length = inputStream.read(); + byte[] buffer = new byte[length]; + + inputStream.read(buffer, 0, length); + TableId tableId = SerializationUtils.deserialize(buffer); + builder.tableId(tableId); + + List commits = Lists.newArrayList(); + while ((length = inputStream.read()) != -1) { + buffer = new byte[length]; + inputStream.read(buffer, 0, length); + WriterStateT deserialize = original.deserialize(version, buffer); + commits.add(deserialize); + } + + builder.state(commits); + + } + return builder.build(); + } +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java new file mode 100644 index 000000000..f568debb6 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java @@ -0,0 +1,62 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink.multiple; + +import com.bytedance.bitsail.base.connector.writer.v1.WriterCommitter; +import com.bytedance.bitsail.base.extension.SupportMultipleSinkTable; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; + +import com.google.common.collect.Maps; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +public class MultipleTableCommitter implements WriterCommitter> { + + private final SupportMultipleSinkTable supplier; + private final Map> multipleCommitters; + private final BitSailConfiguration templateConfiguration; + + public MultipleTableCommitter(BitSailConfiguration templateConfiguration, + SupportMultipleSinkTable supplier) { + this.supplier = supplier; + this.multipleCommitters = Maps.newConcurrentMap(); + this.templateConfiguration = templateConfiguration; + } + + @Override + public List> commit(List> committables) throws IOException { + for (MultipleTableCommit committable : committables) { + TableId tableId = committable.getTableId(); + WriterCommitter realWriterCommitter = multipleCommitters.get(tableId); + if (Objects.isNull(realWriterCommitter)) { + BitSailConfiguration configuration = supplier.applyTableId(templateConfiguration, tableId); + Optional> committer = supplier.createCommitter(configuration); + multipleCommitters.put(tableId, committer.get()); + } + realWriterCommitter.commit(committable.getCommits()); + } + return Collections.emptyList(); + } +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java new file mode 100644 index 000000000..cbac3c028 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java @@ -0,0 +1,136 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink.multiple; + +import com.bytedance.bitsail.base.catalog.CatalogFactory; +import com.bytedance.bitsail.base.connector.BuilderGroup; +import com.bytedance.bitsail.base.connector.writer.v1.Sink; +import com.bytedance.bitsail.base.connector.writer.v1.Writer; +import com.bytedance.bitsail.base.connector.writer.v1.WriterCommitter; +import com.bytedance.bitsail.base.extension.SupportMultipleSinkTable; +import com.bytedance.bitsail.base.serializer.BinarySerializer; +import com.bytedance.bitsail.common.catalog.table.Catalog; +import com.bytedance.bitsail.common.catalog.table.CatalogTable; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.option.WriterOptions; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.type.TypeInfoConverter; +import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.core.common.serializer.multiple.MultipleTableCommitSerializer; +import com.bytedance.bitsail.core.common.serializer.multiple.MultipleTableStateSerializer; +import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; +import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Pattern; + +public class MultipleTableSink + implements Sink, MultipleTableState> { + + private static final Logger LOG = LoggerFactory.getLogger(MultipleTableSink.class); + + private final Sink realSink; + private final CatalogFactory factory; + + private Catalog catalog; + private Map catalogTables; + private BitSailConfiguration commonConfiguration; + private BitSailConfiguration writerConfiguration; + private Pattern patternOfTable; + private TypeInfoValueConverter valueConverter; + + public MultipleTableSink(Sink realSink, + CatalogFactory factory) { + this.realSink = realSink; + this.factory = Preconditions.checkNotNull(factory, + String.format("The sink %s must implement catalog when support multiple table write.", + realSink.getWriterName())); + } + + @Override + public void configure(BitSailConfiguration commonConfiguration, BitSailConfiguration writerConfiguration) throws Exception { + this.commonConfiguration = commonConfiguration; + this.writerConfiguration = writerConfiguration; + this.valueConverter = new TypeInfoValueConverter(commonConfiguration); + this.patternOfTable = Pattern.compile(writerConfiguration.get(WriterOptions.BaseWriterOptions.TABLE_PATTERN)); + this.catalog = factory.createTableCatalog(BuilderGroup.WRITER, writerConfiguration); + this.catalogTables = Maps.newHashMap(); + + List tableIds = catalog.listTables(); + for (TableId tableId : tableIds) { + if (patternOfTable.matcher(tableId.toString()).find()) { + LOG.info("Match table {} of the pattern {}.", tableId, patternOfTable.pattern()); + CatalogTable catalogTable = catalog.getCatalogTable(tableId); + catalogTables.put(tableId, catalogTable); + } + } + } + + @Override + public Writer, MultipleTableState> createWriter(Writer.Context> context) + throws IOException { + return new MultipleTableWriter<>( + writerConfiguration, + context, + valueConverter, + (SupportMultipleSinkTable) realSink, + catalogTables, + patternOfTable, + new DebeziumRowDeserializationSchema(writerConfiguration)); + } + + @Override + public Optional>> createCommitter() { + return Optional.of( + new MultipleTableCommitter<>( + writerConfiguration, + (SupportMultipleSinkTable) realSink + ) + ); + } + + @Override + public String getWriterName() { + return realSink.getWriterName(); + } + + @Override + public TypeInfoConverter createTypeInfoConverter() { + return realSink.createTypeInfoConverter(); + } + + @Override + public BinarySerializer> getCommittableSerializer() { + return new MultipleTableCommitSerializer<>(realSink.getCommittableSerializer()); + } + + @Override + public BinarySerializer> getWriteStateSerializer() { + return new MultipleTableStateSerializer<>(realSink.getWriteStateSerializer()); + } +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java new file mode 100644 index 000000000..8508a2754 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java @@ -0,0 +1,303 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink.multiple; + +import com.bytedance.bitsail.base.connector.writer.v1.Writer; +import com.bytedance.bitsail.base.extension.SupportMultipleSinkTable; +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.catalog.table.CatalogTable; +import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; +import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.exception.CommonErrorCode; +import com.bytedance.bitsail.common.row.MultipleTableRow; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; +import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.collections.MapUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class MultipleTableWriter + implements Writer, MultipleTableState> { + + private static final Logger LOG = LoggerFactory.getLogger(MultipleTableWriter.class); + + private final SupportMultipleSinkTable supplier; + private final Context> context; + private final BitSailConfiguration templateConfiguration; + private final Map catalogTables; + private final DebeziumRowDeserializationSchema deserializationSchema; + private final TypeInfoValueConverter valueConverter; + private final Pattern patternOfTable; + + private transient Map> restoredMultiTableWriters; + private transient Map> processedMultiTableWriters; + private transient Map tableIdRowTypeInfos; + + public MultipleTableWriter(BitSailConfiguration templateConfiguration, + Context> context, + TypeInfoValueConverter typeInfoValueConverter, + SupportMultipleSinkTable supplier, + Map catalogTables, + Pattern patternOfTable, + DebeziumRowDeserializationSchema deserializationSchema) { + this.templateConfiguration = templateConfiguration; + this.context = context; + this.supplier = supplier; + this.catalogTables = catalogTables; + this.deserializationSchema = deserializationSchema; + this.valueConverter = typeInfoValueConverter; + this.processedMultiTableWriters = Maps.newConcurrentMap(); + this.restoredMultiTableWriters = Maps.newConcurrentMap(); + this.tableIdRowTypeInfos = Maps.newConcurrentMap(); + this.patternOfTable = patternOfTable; + + restore(); + } + + private void restore() { + if (context.isRestored()) { + for (MultipleTableState tableState : context.getRestoreStates()) { + TableId tableId = tableState.getTableId(); + CatalogTable catalogTable = catalogTables.get(tableId); + if (Objects.isNull(catalogTable)) { + LOG.warn("Subtask {} table {} already be deleted, skip restore it from state.", + context.getIndexOfSubTaskId(), tableId); + continue; + } + Context clone = cloneWriterContext(catalogTable, context); + BitSailConfiguration configuration = supplier.applyTableId(templateConfiguration, tableId); + Writer writer = supplier.createWriter(clone, configuration); + restoredMultiTableWriters.put(tableId, writer); + } + } + } + + @Override + public void write(Row element) throws IOException { + MultipleTableRow multipleTableRow = MultipleTableRow.of(element); + TableId tableId = TableId.of(multipleTableRow.getTableId()); + + if (!(patternOfTable.matcher(tableId.getTable()).find())) { + LOG.warn("Table {} not match with pattern: {}.", tableId.getTable(), patternOfTable.pattern()); + return; + } + + Writer realWriter; + if (processedMultiTableWriters.containsKey(tableId)) { + realWriter = processedMultiTableWriters.get(tableId); + } else if (restoredMultiTableWriters.containsKey(tableId)) { + realWriter = restoredMultiTableWriters.get(tableId); + } else { + BitSailConfiguration current = supplier + .applyTableId(templateConfiguration.clone(), tableId); + + CatalogTable catalogTable = catalogTables.get(tableId); + if (Objects.isNull(catalogTable)) { + //Not support new table in runtime just now. + LOG.warn("Subtask {} discovered new table: {} from input.", context.getIndexOfSubTaskId(), + tableId); + LOG.warn("Subtask {} not support create new table: {} in runtime right now, skip it.", context.getIndexOfSubTaskId(), + tableId); + return; + } + LOG.info("Subtask {} create real writer for the table: {}.", context.getIndexOfSubTaskId(), tableId); + + Context clone = cloneWriterContext(catalogTable, context); + + realWriter = supplier.createWriter(clone, current); + processedMultiTableWriters.put(tableId, realWriter); + } + + RowTypeInfo rowTypeInfo = tableIdRowTypeInfos.get(tableId); + Row deserialize = deserializationSchema.deserialize( + multipleTableRow.getValue(), + rowTypeInfo.getFieldNames()); + + for (int index = 0; index < rowTypeInfo.getTypeInfos().length; index++) { + + try { + //convert field to real writer type info. + deserialize.setField(index, + valueConverter.convertObject( + deserialize.getField(index), + rowTypeInfo.getTypeInfos()[index]) + ); + } catch (Exception e) { + LOG.error("Subtask {} failed to convert field name {}'s value {} to dest type info {}.", + context.getIndexOfSubTaskId(), + rowTypeInfo.getFieldNames()[index], + deserialize.getField(index), + rowTypeInfo.getTypeInfos()[index]); + //handled as dirty record. + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Subtask %s failed to convert field name %s to dest type info %S.", + context.getIndexOfSubTaskId(), + rowTypeInfo.getFieldNames()[index], + rowTypeInfo.getTypeInfos()[index])); + } + } + realWriter.write((InputT) deserialize); + } + + private Context cloneWriterContext(CatalogTable catalogTable, + Context> context) { + return new Context() { + + @Override + public RowTypeInfo getRowTypeInfo() { + if (tableIdRowTypeInfos.containsKey(catalogTable.getTableId())) { + return tableIdRowTypeInfos.get(catalogTable.getTableId()); + } + CatalogTableSchema catalogTableSchema = catalogTable.getCatalogTableSchema(); + List columns = catalogTableSchema.getColumns(); + String[] fieldNames = columns.stream() + .map(CatalogTableColumn::getName) + .collect(Collectors.toList()) + .toArray(new String[] {}); + + TypeInfo[] fieldTypes = columns.stream() + .map(CatalogTableColumn::getType) + .collect(Collectors.toList()) + .toArray(new TypeInfo[] {}); + + RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldNames, fieldTypes); + tableIdRowTypeInfos.put(catalogTable.getTableId(), rowTypeInfo); + return rowTypeInfo; + } + + @Override + public int getIndexOfSubTaskId() { + return context.getIndexOfSubTaskId(); + } + + @Override + public boolean isRestored() { + for (MultipleTableState tableState : context.getRestoreStates()) { + if (Objects.equals(tableState.getTableId(), catalogTable.getTableId())) { + return true; + } + } + return false; + } + + @Override + public List getRestoreStates() { + for (MultipleTableState tableState : context.getRestoreStates()) { + if (Objects.equals(tableState.getTableId(), catalogTable.getTableId())) { + return tableState.getState(); + } + } + return Collections.emptyList(); + } + }; + } + + @Override + public void flush(boolean endOfInput) throws IOException { + for (Writer writer : processedMultiTableWriters.values()) { + writer.flush(endOfInput); + } + } + + @Override + public List> prepareCommit() throws IOException { + List> prepared = Lists.newArrayList(); + for (TableId tableId : processedMultiTableWriters.keySet()) { + List commit = processedMultiTableWriters.get(tableId).prepareCommit(); + MultipleTableCommit tableCommit = MultipleTableCommit + .builder() + .commits(commit) + .tableId(tableId) + .build(); + prepared.add(tableCommit); + } + if (MapUtils.isNotEmpty(restoredMultiTableWriters)) { + for (TableId tableId : restoredMultiTableWriters.keySet()) { + if (processedMultiTableWriters.containsKey(tableId)) { + continue; + } + Writer writer = restoredMultiTableWriters.get(tableId); + MultipleTableCommit tableCommit = MultipleTableCommit + .builder() + .commits(writer.prepareCommit()) + .tableId(tableId) + .build(); + prepared.add(tableCommit); + //close it. + writer.close(); + } + //clear restored multi writer. + restoredMultiTableWriters.clear(); + } + return prepared; + } + + @Override + public List> snapshotState(long checkpointId) throws IOException { + List> states = Lists.newArrayList(); + for (TableId tableId : processedMultiTableWriters.keySet()) { + List state = processedMultiTableWriters.get(tableId).snapshotState(checkpointId); + + MultipleTableState tableState = MultipleTableState + .builder() + .tableId(tableId) + .state(state) + .build(); + + states.add(tableState); + } + + return states; + } + + @Override + public void close() throws IOException { + for (Writer writer : processedMultiTableWriters.values()) { + writer.close(); + } + } + + @VisibleForTesting + public Map> getProcessedMultiTableWriters() { + return processedMultiTableWriters; + } + + @VisibleForTesting + public Map getTableIdRowTypeInfos() { + return tableIdRowTypeInfos; + } +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/comittable/MultipleTableCommit.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/comittable/MultipleTableCommit.java new file mode 100644 index 000000000..1277f8a69 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/comittable/MultipleTableCommit.java @@ -0,0 +1,38 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink.multiple.comittable; + +import com.bytedance.bitsail.common.catalog.table.TableId; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; +import java.util.List; + +@AllArgsConstructor +@NoArgsConstructor +@Data +@Builder +public class MultipleTableCommit implements Serializable { + + private TableId tableId; + + private List commits; +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/state/MultipleTableState.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/state/MultipleTableState.java new file mode 100644 index 000000000..9ac960c2c --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/state/MultipleTableState.java @@ -0,0 +1,38 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink.multiple.state; + +import com.bytedance.bitsail.common.catalog.table.TableId; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; +import java.util.List; + +@AllArgsConstructor +@NoArgsConstructor +@Data +@Builder +public class MultipleTableState implements Serializable { + + private TableId tableId; + + private List state; +} diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/catalog/FakeCatalog.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/catalog/FakeCatalog.java new file mode 100644 index 000000000..b11a97c2f --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/catalog/FakeCatalog.java @@ -0,0 +1,89 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.catalog; + +import com.bytedance.bitsail.common.catalog.table.Catalog; +import com.bytedance.bitsail.common.catalog.table.CatalogTable; +import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.catalog.table.TableOperation; +import com.bytedance.bitsail.common.type.TypeInfoConverter; +import com.bytedance.bitsail.common.typeinfo.TypeInfo; + +import java.util.List; + +public class FakeCatalog implements Catalog { + + private final List tableIds; + + private final List catalogTables; + + public FakeCatalog(List tableIds, List catalogTables) { + this.tableIds = tableIds; + this.catalogTables = catalogTables; + } + + @Override + public void open(TypeInfoConverter typeInfoConverter) { + + } + + @Override + public void close() { + + } + + @Override + public TableId createCatalogTableDefinition() { + return null; + } + + @Override + public List listTables() { + return tableIds; + } + + @Override + public boolean tableExists(TableId catalogTableDefinition) { + return false; + } + + @Override + public CatalogTable getCatalogTable(TableId tableId) { + return catalogTables.get(catalogTables.indexOf(tableId)); + } + + @Override + public void createTable(TableId catalogTableDefinition, CatalogTable catalogTable) { + + } + + @Override + public void alterTable(TableOperation tableOperation, CatalogTable table) { + + } + + @Override + public void alterTableColumns(TableOperation tableOperation, List catalogTableColumns) { + + } + + @Override + public boolean compareTypeCompatible(TypeInfo original, TypeInfo compared) { + return false; + } +} diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializerTest.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializerTest.java new file mode 100644 index 000000000..c5ad9f47a --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableCommitSerializerTest.java @@ -0,0 +1,55 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.serializer.multiple; + +import com.bytedance.bitsail.base.serializer.SimpleVersionedBinarySerializer; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; + +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +public class MultipleTableCommitSerializerTest { + + private MultipleTableCommitSerializer multipleTableCommitSerializer; + + @Before + public void before() { + multipleTableCommitSerializer = new MultipleTableCommitSerializer<>(new SimpleVersionedBinarySerializer()); + } + + @Test + public void testSerializer() throws IOException { + MultipleTableCommit multipleTableCommit = new MultipleTableCommit(); + + multipleTableCommit.setTableId(TableId.of("test", "name")); + multipleTableCommit.setCommits(Lists.newArrayList("a", "b", "c")); + + byte[] serialize = multipleTableCommitSerializer.serialize(multipleTableCommit); + + MultipleTableCommit deserialize = multipleTableCommitSerializer + .deserialize(multipleTableCommitSerializer.getVersion(), serialize); + + Assert.assertEquals(multipleTableCommit.getTableId(), deserialize.getTableId()); + Assert.assertEquals(multipleTableCommit.getCommits(), deserialize.getCommits()); + } + +} \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializerTest.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializerTest.java new file mode 100644 index 000000000..3e18261c8 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/serializer/multiple/MultipleTableStateSerializerTest.java @@ -0,0 +1,55 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.serializer.multiple; + +import com.bytedance.bitsail.base.serializer.SimpleVersionedBinarySerializer; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; + +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +public class MultipleTableStateSerializerTest { + + private MultipleTableStateSerializer multipleTableCommitSerializer; + + @Before + public void before() { + multipleTableCommitSerializer = new MultipleTableStateSerializer<>(new SimpleVersionedBinarySerializer()); + } + + @Test + public void testSerializer() throws IOException { + MultipleTableState multipleTableState = new MultipleTableState(); + + multipleTableState.setTableId(TableId.of("test", "name")); + multipleTableState.setState(Lists.newArrayList("a", "b", "c")); + + byte[] serialize = multipleTableCommitSerializer.serialize(multipleTableState); + + MultipleTableState deserialize = multipleTableCommitSerializer + .deserialize(multipleTableCommitSerializer.getVersion(), serialize); + + Assert.assertEquals(multipleTableState.getTableId(), deserialize.getTableId()); + Assert.assertEquals(multipleTableState.getState(), deserialize.getState()); + } + +} \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/MultiTablePrintSink.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/MultiTablePrintSink.java new file mode 100644 index 000000000..0b5ac1f69 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/MultiTablePrintSink.java @@ -0,0 +1,47 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink; + +import com.bytedance.bitsail.base.connector.writer.v1.Writer; +import com.bytedance.bitsail.base.connector.writer.v1.WriterCommitter; +import com.bytedance.bitsail.base.extension.SupportMultipleSinkTable; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.option.WriterOptions; +import com.bytedance.bitsail.connector.print.sink.PrintSink; +import com.bytedance.bitsail.connector.print.sink.PrintWriter; + +import java.util.Optional; + +public class MultiTablePrintSink extends PrintSink + implements SupportMultipleSinkTable { + @Override + public Writer createWriter(Writer.Context context, + BitSailConfiguration templateConfiguration) { + return new PrintWriter(templateConfiguration, context); + } + + @Override + public Optional> createCommitter(BitSailConfiguration templateConfiguration) { + return Optional.empty(); + } + + @Override + public BitSailConfiguration applyTableId(BitSailConfiguration template, TableId tableId) { + return template.set(WriterOptions.BaseWriterOptions.TABLE_NAME, tableId.getTable()); + } +} diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java new file mode 100644 index 000000000..6e0bb93ed --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java @@ -0,0 +1,182 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.core.common.sink.multiple; + +import com.bytedance.bitsail.base.connector.writer.v1.Writer; +import com.bytedance.bitsail.base.extension.SupportMultipleSinkTable; +import com.bytedance.bitsail.common.catalog.table.CatalogTable; +import com.bytedance.bitsail.common.catalog.table.CatalogTableColumn; +import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.row.MultipleTableRow; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; +import com.bytedance.bitsail.common.typeinfo.TypeInfos; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.core.common.sink.MultiTablePrintSink; +import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.lang3.StringUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentMap; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class MultipleTableWriterTest { + + private MultiTablePrintSink multiTablePrintSink; + private BitSailConfiguration jobConf; + private Writer.Context> context; + private Map catalogTables; + private TypeInfoValueConverter typeInfoValueConverter; + private MultipleTableWriter multipleTableWriter; + + private CatalogTable catalogTable1; + private CatalogTable catalogTable2; + + @Before + public void before() { + this.multiTablePrintSink = new MultiTablePrintSink(); + this.jobConf = BitSailConfiguration.newDefault(); + this.catalogTables = mockCatalogTables(); + this.typeInfoValueConverter = new TypeInfoValueConverter(BitSailConfiguration.newDefault()); + this.context = new Writer.Context>() { + @Override + public RowTypeInfo getRowTypeInfo() { + return null; + } + + @Override + public int getIndexOfSubTaskId() { + return 0; + } + + @Override + public boolean isRestored() { + return false; + } + + @Override + public List> getRestoreStates() { + return null; + } + }; + multipleTableWriter = new MultipleTableWriter<>( + jobConf, + context, + typeInfoValueConverter, + (SupportMultipleSinkTable) multiTablePrintSink, + catalogTables, + Pattern.compile("\\.*"), + new DebeziumRowDeserializationSchema(jobConf)); + } + + private Map mockCatalogTables() { + ConcurrentMap catalogTables = Maps.newConcurrentMap(); + + catalogTable1 = CatalogTable.builder() + .tableId(TableId.of("default", "test1")) + .catalogTableSchema(CatalogTableSchema.builder() + .columns(Lists.newArrayList( + CatalogTableColumn.builder() + .type(TypeInfos.INT_TYPE_INFO) + .name("int_type") + .build() + )).build() + ).build(); + catalogTable2 = CatalogTable.builder() + .tableId(TableId.of("default", "test2")) + .catalogTableSchema(CatalogTableSchema.builder() + .columns(Lists.newArrayList( + CatalogTableColumn.builder() + .type(TypeInfos.DOUBLE_TYPE_INFO) + .name("double_type") + .build() + )).build() + ).build(); + + catalogTables.put(catalogTable1.getTableId(), catalogTable1); + catalogTables.put(catalogTable2.getTableId(), catalogTable2); + + return catalogTables; + } + + @Test + public void testMultiTableWriter() throws IOException, URISyntaxException { + for (CatalogTable catalogTable : catalogTables.values()) { + multipleTableWriter.write(mockMultiTableRow(catalogTable)); + } + Map> processedMultiTableWriters = multipleTableWriter.getProcessedMultiTableWriters(); + Assert.assertEquals(processedMultiTableWriters.values().size(), 2); + + validateRowTypeInfo(catalogTable1, multipleTableWriter.getTableIdRowTypeInfos().get(catalogTable1.getTableId())); + validateRowTypeInfo(catalogTable2, multipleTableWriter.getTableIdRowTypeInfos().get(catalogTable2.getTableId())); + } + + public static void validateRowTypeInfo(CatalogTable catalogTable, RowTypeInfo rowTypeInfo) { + Assert.assertNotNull(catalogTable); + Assert.assertNotNull(rowTypeInfo); + + TypeInfo[] fieldTypes = catalogTable.getCatalogTableSchema().getColumns() + .stream() + .map(CatalogTableColumn::getType) + .collect(Collectors.toList()) + .toArray(new TypeInfo[] {}); + + String[] fieldNames = catalogTable.getCatalogTableSchema().getColumns() + .stream() + .map(CatalogTableColumn::getName) + .collect(Collectors.toList()) + .toArray(new String[] {}); + + Assert.assertArrayEquals(fieldTypes, rowTypeInfo.getTypeInfos()); + Assert.assertArrayEquals(fieldNames, rowTypeInfo.getFieldNames()); + } + + private static Row mockMultiTableRow(CatalogTable catalogTable) throws URISyntaxException, IOException { + MultipleTableRow multipleTableRow = MultipleTableRow.of( + catalogTable.getTableId().toString(), + StringUtils.EMPTY, + mockValueJson(), + String.valueOf(Long.MAX_VALUE), + StringUtils.EMPTY + ); + return multipleTableRow.asRow(); + } + + private static String mockValueJson() throws URISyntaxException, IOException { + return new String(Files.readAllBytes(Paths.get(MultipleTableWriterTest.class + .getClassLoader() + .getResource("file/debezium_table1.json") + .toURI() + .getPath()))); + } +} \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-common/src/test/resources/file/debezium_table1.json b/bitsail-cores/bitsail-core-common/src/test/resources/file/debezium_table1.json new file mode 100644 index 000000000..5699f29b4 --- /dev/null +++ b/bitsail-cores/bitsail-core-common/src/test/resources/file/debezium_table1.json @@ -0,0 +1,257 @@ +{ + "schema":{ + "type":"struct", + "fields":[ + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"int_type" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"4", + "connect.decimal.precision":"20" + }, + "field":"double_type" + }, + { + "type":"string", + "optional":true, + "field":"date_type" + }, + { + "type":"string", + "optional":true, + "default":"", + "field":"varchar_type" + }, + { + "type":"int32", + "optional":false, + "name":"io.debezium.time.Date", + "version":1, + "field":"datetime" + } + ], + "optional":true, + "name":"localhost.test.jdbc_source_test.Value", + "field":"before" + }, + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"int_type" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"4", + "connect.decimal.precision":"20" + }, + "field":"double_type" + }, + { + "type":"string", + "optional":true, + "field":"date_type" + }, + { + "type":"string", + "optional":true, + "default":"", + "field":"varchar_type" + }, + { + "type":"int32", + "optional":false, + "name":"io.debezium.time.Date", + "version":1, + "field":"datetime" + } + ], + "optional":true, + "name":"localhost.test.jdbc_source_test.Value", + "field":"after" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"version" + }, + { + "type":"string", + "optional":false, + "field":"connector" + }, + { + "type":"string", + "optional":false, + "field":"name" + }, + { + "type":"int64", + "optional":false, + "field":"ts_ms" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Enum", + "version":1, + "parameters":{ + "allowed":"true,last,false" + }, + "default":"false", + "field":"snapshot" + }, + { + "type":"string", + "optional":false, + "field":"db" + }, + { + "type":"string", + "optional":true, + "field":"sequence" + }, + { + "type":"string", + "optional":true, + "field":"table" + }, + { + "type":"int64", + "optional":false, + "field":"server_id" + }, + { + "type":"string", + "optional":true, + "field":"gtid" + }, + { + "type":"string", + "optional":false, + "field":"file" + }, + { + "type":"int64", + "optional":false, + "field":"pos" + }, + { + "type":"int32", + "optional":false, + "field":"row" + }, + { + "type":"int64", + "optional":true, + "field":"thread" + }, + { + "type":"string", + "optional":true, + "field":"query" + } + ], + "optional":false, + "name":"io.debezium.connector.mysql.Source", + "field":"source" + }, + { + "type":"string", + "optional":false, + "field":"op" + }, + { + "type":"int64", + "optional":true, + "field":"ts_ms" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":false, + "field":"total_order" + }, + { + "type":"int64", + "optional":false, + "field":"data_collection_order" + } + ], + "optional":true, + "field":"transaction" + } + ], + "optional":false, + "name":"localhost.test.jdbc_source_test.Envelope" + }, + "payload":{ + "before":null, + "after":{ + "id":1, + "int_type":1001, + "double_type":"AbH8", + "date_type":"2022-10-01", + "varchar_type":"varchar_01", + "datetime":19297 + }, + "source":{ + "version":"1.6.4.Final", + "connector":"mysql", + "name":"localhost", + "ts_ms":1682237044000, + "snapshot":"false", + "db":"test", + "sequence":null, + "table":"jdbc_source_test", + "server_id":1, + "gtid":null, + "file":"binlog.000002", + "pos":986, + "row":0, + "thread":null, + "query":null + }, + "op":"c", + "ts_ms":1682237048134, + "transaction":null + } +} \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/builder/FlinkWriterBuilder.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/builder/FlinkWriterBuilder.java index 48ec8cdc8..becf8555e 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/builder/FlinkWriterBuilder.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/builder/FlinkWriterBuilder.java @@ -16,8 +16,8 @@ package com.bytedance.bitsail.core.flink.bridge.writer.builder; +import com.bytedance.bitsail.base.catalog.CatalogFactoryHelper; import com.bytedance.bitsail.base.connector.writer.v1.Sink; -import com.bytedance.bitsail.base.connector.writer.v1.WriterCommitter; import com.bytedance.bitsail.base.connector.writer.v1.comittable.CommittableMessage; import com.bytedance.bitsail.base.dirty.AbstractDirtyCollector; import com.bytedance.bitsail.base.dirty.DirtyCollectorFactory; @@ -25,6 +25,7 @@ import com.bytedance.bitsail.base.execution.Mode; import com.bytedance.bitsail.base.execution.ProcessResult; import com.bytedance.bitsail.base.extension.GlobalCommittable; +import com.bytedance.bitsail.base.extension.SupportMultipleSinkTable; import com.bytedance.bitsail.base.extension.TypeInfoConverterFactory; import com.bytedance.bitsail.base.messenger.Messenger; import com.bytedance.bitsail.base.messenger.checker.DirtyRecordChecker; @@ -34,7 +35,9 @@ import com.bytedance.bitsail.base.ratelimit.Channel; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.option.CommonOptions; +import com.bytedance.bitsail.common.option.WriterOptions; import com.bytedance.bitsail.common.type.TypeInfoConverter; +import com.bytedance.bitsail.core.common.sink.multiple.MultipleTableSink; import com.bytedance.bitsail.core.flink.bridge.writer.delegate.DelegateFlinkCommitter; import com.bytedance.bitsail.core.flink.bridge.writer.delegate.DelegateFlinkWriter; import com.bytedance.bitsail.flink.core.execution.FlinkExecutionEnviron; @@ -48,6 +51,7 @@ import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,7 +66,7 @@ public class FlinkWriterBuilder sink; + private Sink sink; private boolean isBatchMode; @@ -89,6 +93,16 @@ public void configure(ExecutionEnviron execution, BitSailConfiguration writerCon this.commonConfiguration = execution.getCommonConfiguration(); this.writerConfiguration = writerConfiguration; + if (writerConfiguration.get(WriterOptions.BaseWriterOptions.MULTIPLE_TABLE_ENABLED)) { + if (sink instanceof SupportMultipleSinkTable) { + sink = new MultipleTableSink<>(sink, CatalogFactoryHelper + .getTableCatalogFactory(sink.getWriterName())); + } else { + LOG.info("Sink {} must implement interface SupportMultipleSinkTable when enabled option {}.", + sink.getWriterName(), + WriterOptions.BaseWriterOptions.MULTIPLE_TABLE_ENABLED.key()); + } + } sink.configure(execution.getCommonConfiguration(), writerConfiguration); this.messengerContext = SimpleMessengerContext.builder() @@ -111,7 +125,7 @@ public void addWriter(DataStream source, int writerParallelism) { .getCheckpointConfig() .isCheckpointingEnabled(); - DelegateFlinkWriter flinkWriter = new DelegateFlinkWriter<>( + DelegateFlinkWriter flinkWriter = new DelegateFlinkWriter<>( commonConfiguration, writerConfiguration, sink, @@ -122,22 +136,22 @@ public void addWriter(DataStream source, int writerParallelism) { flinkWriter.setDirtyCollector(dirtyCollector); flinkWriter.setChannel(channel); - DataStream> writeStream = source.transform(getWriterOperatorName(), - TypeInformation.of(new TypeHint>() { - }), flinkWriter) + DataStream> writeStream = source.transform(getWriterOperatorName(), + (TypeInformation) TypeInformation.of(new TypeHint>() { + }), (OneInputStreamOperator) flinkWriter) .setParallelism(writerParallelism) .name(getWriterOperatorName()) .uid(getWriterOperatorName()); - Optional> committer = sink.createCommitter(); + Optional committer = sink.createCommitter(); if (committer.isPresent()) { LOG.info("Writer enabled committer."); - DataStream> commitStream = writeStream + DataStream> commitStream = writeStream .transform( getWriterCommitterOperatorName(), - TypeInformation.of(new TypeHint>() { + (TypeInformation) TypeInformation.of(new TypeHint>() { }), - new DelegateFlinkCommitter<>(sink, isBatchMode, isCheckpointingEnabled)) + (OneInputStreamOperator) new DelegateFlinkCommitter<>(sink, isBatchMode, isCheckpointingEnabled)) .uid(getWriterCommitterOperatorName()) .name(getWriterCommitterOperatorName()) .setParallelism(writerParallelism); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java index 9682c5b65..9dc456fd5 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java @@ -81,7 +81,7 @@ public class DelegateFlinkWriter sink; private final BitSailConfiguration writerConfiguration; private final BitSailConfiguration commonConfiguration; - private final FlinkRowConverter flinkRowConvertSerializer; + private final FlinkRowConverter flinkRowConverter; private final RowTypeInfo rowTypeInfo; private transient Writer writer; private transient ListState writeState; @@ -117,7 +117,7 @@ public DelegateFlinkWriter(BitSailConfiguration commonConfiguration, .getRowTypeInfo(sink.createTypeInfoConverter(), columnInfos); } - this.flinkRowConvertSerializer = new FlinkRowConverter( + this.flinkRowConverter = new FlinkRowConverter( this.rowTypeInfo, this.commonConfiguration); } @@ -191,7 +191,7 @@ public void processElement(StreamRecord element) throws Exception { try { if (value instanceof Row) { // convert flink row to BitSail row. - com.bytedance.bitsail.common.row.Row deserializer = flinkRowConvertSerializer.to((Row) value); + com.bytedance.bitsail.common.row.Row deserializer = flinkRowConverter.from((Row) value); writer.write((InputT) deserializer); } else { writer.write(element.getValue()); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/pom.xml b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/pom.xml index 9b67f455d..d29ddc0c7 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/pom.xml +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/pom.xml @@ -123,6 +123,27 @@ + + org.apache.flink + flink-runtime-web + ${flink.version} + provided + + + org.yaml + snakeyaml + + + org.apache.commons + commons-pool2 + + + org.eclipse.jetty + jetty-http + + + + org.apache.flink flink-table-api-scala-bridge_${scala.binary.version} diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/configurer/FlinkDAGBuilderInterceptor.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/configurer/FlinkDAGBuilderInterceptor.java index b7bbceaeb..c625d6e13 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/configurer/FlinkDAGBuilderInterceptor.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-base/src/main/java/com/bytedance/bitsail/flink/core/execution/configurer/FlinkDAGBuilderInterceptor.java @@ -16,8 +16,8 @@ package com.bytedance.bitsail.flink.core.execution.configurer; -import com.bytedance.bitsail.base.catalog.TableCatalogFactory; -import com.bytedance.bitsail.base.catalog.TableCatalogFactoryHelper; +import com.bytedance.bitsail.base.catalog.CatalogFactory; +import com.bytedance.bitsail.base.catalog.CatalogFactoryHelper; import com.bytedance.bitsail.base.connector.BuilderGroup; import com.bytedance.bitsail.base.connector.reader.DataReaderDAGBuilder; import com.bytedance.bitsail.base.connector.transformer.DataTransformDAGBuilder; @@ -69,10 +69,10 @@ private void alignTableCatalog(List readerBuilders, DataReaderDAGBuilder dataReaderDAGBuilder = readerBuilders.get(0); DataWriterDAGBuilder dataWriterDAGBuilder = writerBuilders.get(0); - TableCatalogFactory readerCatalogFactory = TableCatalogFactoryHelper + CatalogFactory readerCatalogFactory = CatalogFactoryHelper .getTableCatalogFactory(dataReaderDAGBuilder.getReaderName()); - TableCatalogFactory writerCatalogFactory = TableCatalogFactoryHelper + CatalogFactory writerCatalogFactory = CatalogFactoryHelper .getTableCatalogFactory(dataWriterDAGBuilder.getWriterName()); if (Objects.isNull(readerCatalogFactory) || Objects.isNull(writerCatalogFactory)) { @@ -100,8 +100,8 @@ private void alignTableCatalog(List readerBuilders, BitSailConfiguration writerConfiguration = executionEnviron.getWriterConfigurations().get(0); TableCatalogManager catalogManager = TableCatalogManager.builder() - .readerTableCatalog(readerCatalogFactory.createTableCatalog(BuilderGroup.READER, executionEnviron, readerConfiguration)) - .writerTableCatalog(writerCatalogFactory.createTableCatalog(BuilderGroup.WRITER, executionEnviron, writerConfiguration)) + .readerCatalog(readerCatalogFactory.createTableCatalog(BuilderGroup.READER, readerConfiguration)) + .writerCatalog(writerCatalogFactory.createTableCatalog(BuilderGroup.WRITER, writerConfiguration)) .readerTypeInfoConverter(readerTypeInfoConverter) .writerTypeInfoConverter(writerTypeInfoConverter) .commonConfiguration(executionEnviron.getCommonConfiguration()) diff --git a/bitsail-cores/bitsail-core-flink/pom.xml b/bitsail-cores/bitsail-core-flink/pom.xml index bc8ab00a6..7a562acf6 100644 --- a/bitsail-cores/bitsail-core-flink/pom.xml +++ b/bitsail-cores/bitsail-core-flink/pom.xml @@ -226,6 +226,14 @@ + + + com.bytedance.bitsail + bitsail-core-common + ${revision} + + + diff --git a/bitsail-cores/pom.xml b/bitsail-cores/pom.xml index 04e53e04a..86c245e29 100644 --- a/bitsail-cores/pom.xml +++ b/bitsail-cores/pom.xml @@ -34,6 +34,7 @@ bitsail-core-api bitsail-core-entry + bitsail-core-common bitsail-core-flink \ No newline at end of file diff --git a/bitsail-test/bitsail-test-integration/bitsail-test-integration-connector-legacy/bitsail-test-integration-jdbc-legacy/src/test/java/com/bytedance/bitsail/test/integration/legacy/mysql/MysqlConnectorITCase.java b/bitsail-test/bitsail-test-integration/bitsail-test-integration-connector-legacy/bitsail-test-integration-jdbc-legacy/src/test/java/com/bytedance/bitsail/test/integration/legacy/mysql/MysqlConnectorITCase.java index 4e99daa79..a558f3864 100644 --- a/bitsail-test/bitsail-test-integration/bitsail-test-integration-connector-legacy/bitsail-test-integration-jdbc-legacy/src/test/java/com/bytedance/bitsail/test/integration/legacy/mysql/MysqlConnectorITCase.java +++ b/bitsail-test/bitsail-test-integration/bitsail-test-integration-connector-legacy/bitsail-test-integration-jdbc-legacy/src/test/java/com/bytedance/bitsail/test/integration/legacy/mysql/MysqlConnectorITCase.java @@ -17,9 +17,9 @@ package com.bytedance.bitsail.test.integration.legacy.mysql; import com.bytedance.bitsail.common.catalog.table.CatalogTable; -import com.bytedance.bitsail.common.catalog.table.CatalogTableDefinition; +import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; -import com.bytedance.bitsail.connector.legacy.jdbc.catalog.MySQLTableCatalog; +import com.bytedance.bitsail.connector.legacy.jdbc.catalog.MySQLCatalog; import com.bytedance.bitsail.connector.legacy.jdbc.converter.JdbcTypeInfoConverter; import com.bytedance.bitsail.connector.legacy.jdbc.model.ClusterInfo; import com.bytedance.bitsail.connector.legacy.jdbc.model.ConnectionInfo; @@ -110,7 +110,7 @@ private void mysqlReader(String filePath) throws Exception { @Test public void testGetCatalogTable() { - MySQLTableCatalog catalog = MySQLTableCatalog + MySQLCatalog catalog = MySQLCatalog .builder() .url(mySQLContainer.getJdbcUrl()) .table(TABLE) @@ -121,7 +121,7 @@ public void testGetCatalogTable() { catalog.open(new JdbcTypeInfoConverter("mysql")); - CatalogTableDefinition catalogTableDefinition = catalog.createCatalogTableDefinition(); + TableId catalogTableDefinition = catalog.createCatalogTableDefinition(); CatalogTable catalogTable = catalog.getCatalogTable(catalogTableDefinition); Assert.assertNotNull(catalogTable.getCatalogTableSchema()); From 47fd10238d97cf63772f59c74e8295ac92db6bf9 Mon Sep 17 00:00:00 2001 From: haoke Date: Tue, 25 Apr 2023 16:14:16 +0800 Subject: [PATCH 06/14] [BitSail][Multi-Sink]support type info converter. --- .../common/typeinfo/BasicArrayTypeInfo.java | 9 + .../bitsail/common/typeinfo/ListTypeInfo.java | 9 + .../bitsail/common/typeinfo/MapTypeInfo.java | 9 + .../bitsail/common/typeinfo/TypeInfo.java | 8 + .../common/typeinfo/TypeInfoBridge.java | 26 + .../common/typeinfo/TypeInfoCompatibles.java | 855 ++++++++++++++++++ .../typeinfo/TypeInfoValueConverter.java | 209 +---- .../typeinfo/TypeInfoCompatiblesTest.java | 239 +++++ .../typeinfo/TypeInfoValueConverterTest.java | 79 ++ .../DebeziumRowDeserializationSchemaTest.java | 58 +- .../test/resources/file/debezium_delete.json | 54 ++ .../test/resources/file/debezium_insert.json | 257 ++++++ .../test/resources/file/debezium_upsert.json | 207 +++++ .../file/postgres/debezium_pg_delete.json | 635 +++++++++++++ .../file/postgres/debezium_pg_upsert.json | 680 ++++++++++++++ .../multiple/MultipleTableWriterTest.java | 4 +- 16 files changed, 3134 insertions(+), 204 deletions(-) create mode 100644 bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java create mode 100644 bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatiblesTest.java create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_delete.json create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_insert.json create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_upsert.json create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_delete.json create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_upsert.json diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/BasicArrayTypeInfo.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/BasicArrayTypeInfo.java index 03994c954..f273d14f1 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/BasicArrayTypeInfo.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/BasicArrayTypeInfo.java @@ -18,6 +18,7 @@ import com.bytedance.bitsail.common.util.Preconditions; +import java.nio.charset.Charset; import java.util.Objects; public class BasicArrayTypeInfo extends TypeInfo { @@ -52,6 +53,14 @@ public boolean equals(Object obj) { } } + @Override + public Object compatibleTo(TypeInfo target, Object value) { + if (TypeInfos.STRING_TYPE_INFO.getTypeClass() == target.getTypeClass()) { + return new String((byte[]) value, Charset.defaultCharset()); + } + return super.compatibleTo(target, value); + } + @Override public int hashCode() { return Objects.hash(arrayClass); diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/ListTypeInfo.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/ListTypeInfo.java index 582e50742..e69474ab8 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/ListTypeInfo.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/ListTypeInfo.java @@ -16,6 +16,7 @@ package com.bytedance.bitsail.common.typeinfo; +import com.bytedance.bitsail.common.util.JsonSerializer; import com.bytedance.bitsail.common.util.Preconditions; import java.util.List; @@ -52,6 +53,14 @@ public boolean equals(Object obj) { } } + @Override + public Object compatibleTo(TypeInfo target, Object value) { + if (TypeInfos.STRING_TYPE_INFO.getTypeClass() == target.getTypeClass()) { + return JsonSerializer.serialize(value); + } + return super.compatibleTo(target, value); + } + public TypeInfo getElementTypeInfo() { return this.elementTypeInfo; } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/MapTypeInfo.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/MapTypeInfo.java index 1d9e86594..e489affdb 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/MapTypeInfo.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/MapTypeInfo.java @@ -16,6 +16,7 @@ package com.bytedance.bitsail.common.typeinfo; +import com.bytedance.bitsail.common.util.JsonSerializer; import com.bytedance.bitsail.common.util.Preconditions; import java.util.Map; @@ -69,6 +70,14 @@ public TypeInfo getValueTypeInfo() { return this.valueTypeInfo; } + @Override + public Object compatibleTo(TypeInfo target, Object value) { + if (TypeInfos.STRING_TYPE_INFO.getTypeClass() == target.getTypeClass()) { + return JsonSerializer.serialize(value); + } + return super.compatibleTo(target, value); + } + @Override public int hashCode() { return 31 * keyTypeInfo.hashCode() + valueTypeInfo.hashCode(); diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfo.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfo.java index c541b9971..d007fbdef 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfo.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfo.java @@ -16,6 +16,9 @@ package com.bytedance.bitsail.common.typeinfo; +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.exception.CommonErrorCode; + import java.io.Serializable; import java.util.List; @@ -42,4 +45,9 @@ public List getTypeProperties() { public void setTypeProperties(List typeProperties) { } + + public Object compatibleTo(TypeInfo target, Object value) { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Type %s can't bridged to target type info %s.", this, target)); + } } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoBridge.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoBridge.java index 883f74895..64870b207 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoBridge.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoBridge.java @@ -22,6 +22,7 @@ import com.google.common.collect.Maps; import org.apache.commons.lang3.StringUtils; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -36,6 +37,9 @@ public class TypeInfoBridge { public static final Map, Types> TYPE_INFO_TYPES_MAPPING = Maps.newHashMap(); + public static final Map, TypeInfo> TYPE_INFO_CLASS_MAPPING = + Maps.newHashMap(); + static { TYPE_INFO_MAPPING.put(Types.VOID, TypeInfos.VOID_TYPE_INFO); TYPE_INFO_MAPPING.put(Types.SHORT, TypeInfos.SHORT_TYPE_INFO); @@ -65,7 +69,11 @@ public class TypeInfoBridge { TYPE_INFO_MAPPING.get(type)); } TYPE_INFO_TYPES_MAPPING.put(TYPE_INFO_MAPPING.get(type).getTypeClass(), type); + TYPE_INFO_CLASS_MAPPING.put(TYPE_INFO_MAPPING.get(type).getTypeClass(), TYPE_INFO_MAPPING.get(type)); } + + //Add extra java.util.date. + TYPE_INFO_CLASS_MAPPING.put(java.util.Date.class, TypeInfos.SQL_TIMESTAMP_TYPE_INFO); } public static TypeInfo bridgeTypeInfo(String typeString) { @@ -85,4 +93,22 @@ public static String bridgeTypes(TypeInfo typeInfo) { String.format("Not support bridge complex type info %s.", typeInfo)); } + public static TypeInfo bridgeTypeClass(Class clazz) { + if (Objects.isNull(clazz)) { + return TypeInfos.VOID_TYPE_INFO; + } + TypeInfo typeInfo = TYPE_INFO_CLASS_MAPPING.get(clazz); + if (Objects.nonNull(typeInfo)) { + return typeInfo; + } + if (Map.class.isAssignableFrom(clazz)) { + return new MapTypeInfo<>(new GenericTypeInfo<>(Object.class), new GenericTypeInfo<>(Object.class)); + } + if (List.class.isAssignableFrom(clazz)) { + return new ListTypeInfo<>(new GenericTypeInfo<>(Object.class)); + } + throw BitSailException.asBitSailException(CommonErrorCode.INTERNAL_ERROR, + String.format("Not support bridge type info from class %s", clazz)); + } + } diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java new file mode 100644 index 000000000..69c4dd8e9 --- /dev/null +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java @@ -0,0 +1,855 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.typeinfo; + +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.exception.CommonErrorCode; +import com.bytedance.bitsail.common.option.CommonOptions; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.HashBasedTable; +import org.apache.commons.lang3.math.NumberUtils; + +import java.io.Serializable; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Objects; +import java.util.function.Function; + +public class TypeInfoCompatibles implements Serializable { + + private static final Long MAX_INTEGER_LONG_VALUE = (long) Integer.MAX_VALUE; + private static final Long MIN_INTEGER_LONG_VALUE = (long) Integer.MIN_VALUE; + + private static final Long MAX_SHORT_LONG_VALUE = (long) Short.MAX_VALUE; + private static final Long MIN_SHORT_LONG_VALUE = (long) Short.MIN_VALUE; + + private static final Integer MAX_SHORT_INTEGER_VALUE = (int) Short.MAX_VALUE; + private static final Integer MIN_SHORT_INTEGER_VALUE = (int) Short.MIN_VALUE; + + private final BitSailConfiguration commonConfiguration; + + private final DateTimeFormatter dateFormatter; + private final DateTimeFormatter timeFormatter; + private final DateTimeFormatter dateTimeFormatter; + private final ZoneId dateTimeZone; + + private final HashBasedTable, TypeInfo, Function> compatibles; + + public TypeInfoCompatibles(BitSailConfiguration commonConfiguration) { + this.commonConfiguration = commonConfiguration; + this.compatibles = HashBasedTable.create(); + + this.dateFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions + .DateFormatOptions.DATE_PATTERN)); + this.timeFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions + .DateFormatOptions.TIME_PATTERN)); + this.dateTimeFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions + .DateFormatOptions.DATE_TIME_PATTERN)); + + this.dateTimeZone = commonConfiguration.fieldExists(CommonOptions.DateFormatOptions.TIME_ZONE) ? + ZoneId.of(commonConfiguration.get(CommonOptions.DateFormatOptions.TIME_ZONE)) : + ZoneId.systemDefault(); + + addByteArrayTypeInfoCompatibles(); + addBooleanTypeInfoCompatibles(); + addStringTypeInfoCompatibles(); + addNumberTypeInfoCompatibles(); + addSqlDateTypeInfoCompatibles(); + addSqlTimeTypeInfoCompatibles(); + addSqlTimestampTypeInfoCompatibles(); + addLocalDateTypeInfoCompatibles(); + addLocalTimeTypeInfoCompatibles(); + addLocalDateTimeTypeInfoCompatibles(); + } + + private void addByteArrayTypeInfoCompatibles() { + compatibles.put(BasicArrayTypeInfo.BINARY_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> new String((byte[]) value, Charset.defaultCharset()) + ); + } + + private void addLocalDateTimeTypeInfoCompatibles() { + compatibles.put(TypeInfos.LOCAL_DATE_TIME_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((LocalDateTime) value).format(dateTimeFormatter) + ); + + compatibles.put(TypeInfos.LOCAL_DATE_TIME_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + //TODO check time zone. + (value) -> ((LocalDateTime) value).atZone(dateTimeZone) + .toInstant().toEpochMilli() + ); + } + + private void addLocalTimeTypeInfoCompatibles() { + compatibles.put(TypeInfos.LOCAL_TIME_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((LocalTime) value).format(timeFormatter) + ); + } + + private void addLocalDateTypeInfoCompatibles() { + compatibles.put(TypeInfos.LOCAL_DATE_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((LocalDate) value).format(dateFormatter) + ); + } + + private void addSqlTimestampTypeInfoCompatibles() { + + compatibles.put(TypeInfos.SQL_TIMESTAMP_TYPE_INFO, + TypeInfos.LOCAL_DATE_TIME_TYPE_INFO, + (value) -> ((Timestamp) value).toLocalDateTime() + ); + + compatibles.put(TypeInfos.SQL_TIMESTAMP_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + return dateTimeFormatter.format(((LocalDateTime) compatibles.get(TypeInfos.SQL_TIMESTAMP_TYPE_INFO, TypeInfos.LOCAL_DATE_TIME_TYPE_INFO) + .apply(value))); + } + } + ); + compatibles.put(TypeInfos.SQL_TIMESTAMP_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((Timestamp) value).getTime() + ); + + } + + private void addSqlTimeTypeInfoCompatibles() { + compatibles.put(TypeInfos.SQL_TIME_TYPE_INFO, + TypeInfos.LOCAL_TIME_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + java.util.Date date = (java.util.Date) value; + return Instant.ofEpochMilli(date.getTime()) + .atZone(dateTimeZone) + .toLocalTime(); + } + } + ); + + compatibles.put(TypeInfos.SQL_TIME_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + return timeFormatter.format(((LocalTime) compatibles.get(TypeInfos.SQL_TIME_TYPE_INFO, TypeInfos.LOCAL_TIME_TYPE_INFO) + .apply(value))); + } + } + ); + compatibles.put(TypeInfos.SQL_TIME_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((java.util.Date) value).getTime() + ); + } + + private void addSqlDateTypeInfoCompatibles() { + + compatibles.put(TypeInfos.SQL_DATE_TYPE_INFO, + TypeInfos.LOCAL_DATE_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + java.util.Date date = (java.util.Date) value; + return Instant.ofEpochMilli(date.getTime()) + .atZone(dateTimeZone) + .toLocalDate(); + } + } + ); + + compatibles.put(TypeInfos.SQL_DATE_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + return dateFormatter.format(((LocalDate) compatibles.get(TypeInfos.SQL_DATE_TYPE_INFO, TypeInfos.LOCAL_DATE_TYPE_INFO) + .apply(value))); + } + } + ); + + compatibles.put(TypeInfos.SQL_DATE_TYPE_INFO, + TypeInfos.LOCAL_DATE_TIME_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + java.util.Date date = (java.util.Date) value; + return Instant.ofEpochMilli(date.getTime()) + .atZone(dateTimeZone) + .toLocalDateTime(); + } + } + ); + + compatibles.put(TypeInfos.SQL_DATE_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + java.util.Date date = (java.util.Date) value; + return date.getTime(); + } + } + ); + } + + private void addNumberTypeInfoCompatibles() { + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((Short) value).toString() + ); + + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.BYTE_TYPE_INFO, + (value) -> ((Short) value).byteValue() + ); + + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.BOOLEAN_TYPE_INFO, + (value) -> ((Short) value) == 1 + ); + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.INT_TYPE_INFO, + (value) -> ((Short) value).intValue() + ); + + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((Short) value).longValue() + ); + + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.FLOAT_TYPE_INFO, + (value) -> ((Short) value).floatValue() + ); + + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.DOUBLE_TYPE_INFO, + (value) -> ((Short) value).doubleValue() + ); + + //TODO in future, will be removed. short -> big integer + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.BIG_INTEGER_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return new BigInteger(((Short) value).toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("short [%s] can't convert to big integer.", value)); + } + } + } + ); + + //TODO in future, will be removed. short -> big decimal + compatibles.put(TypeInfos.SHORT_TYPE_INFO, + TypeInfos.BIG_DECIMAL_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return new BigDecimal(((Short) value).toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("short [%s] can't convert to big decimal.", value)); + } + } + } + ); + + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((Integer) value).toString() + ); + + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.SHORT_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + Integer integer = (Integer) value; + if (shortOverflow(integer)) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_OVER_FLOW, + String.format("int [%s] can't convert to short.", value)); + } + return integer.shortValue(); + } + } + ); + + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((Integer) value).longValue() + ); + + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.FLOAT_TYPE_INFO, + (value) -> ((Integer) value).floatValue() + ); + + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.DOUBLE_TYPE_INFO, + (value) -> ((Integer) value).doubleValue() + ); + + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.BOOLEAN_TYPE_INFO, + (value) -> ((Integer) value) == 1 + ); + + //TODO in future, will be removed. int -> big integer + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.BIG_INTEGER_TYPE_INFO, + (value) -> BigInteger.valueOf(((Integer) value).longValue()) + ); + + //TODO in future, will be removed. int -> big decimal + compatibles.put(TypeInfos.INT_TYPE_INFO, + TypeInfos.BIG_DECIMAL_TYPE_INFO, + value -> BigDecimal.valueOf((Long) compatibles.get(TypeInfos.INT_TYPE_INFO, TypeInfos.LONG_TYPE_INFO) + .apply(value)) + ); + + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((Long) value).toString() + ); + + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.DOUBLE_TYPE_INFO, + (value) -> ((Long) value).doubleValue() + ); + + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.BOOLEAN_TYPE_INFO, + (value) -> ((Long) value) == 1L + ); + + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.INT_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + Long longValue = (Long) value; + if (integerOverflow(longValue)) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_OVER_FLOW, + String.format("long [%s] can't convert to int.", value)); + } + return longValue.intValue(); + } + } + ); + + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.SHORT_TYPE_INFO, + //TODO overflow check + new Function() { + @Override + public Object apply(Object value) { + Long longValue = (Long) value; + if (shortOverflow(longValue)) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_OVER_FLOW, + String.format("long [%s] can't convert to short.", value)); + } + return longValue.shortValue(); + } + } + ); + + //only support milliseconds + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.LOCAL_DATE_TIME_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return Instant.ofEpochMilli((Long) value).atZone(ZoneId.systemDefault()) + .toLocalDateTime(); + } catch (Exception e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("long [%s] can't convert to local date time.", value)); + } + } + } + ); + + compatibles.put(TypeInfos.LONG_TYPE_INFO, + TypeInfos.SQL_TIMESTAMP_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + try { + return new Timestamp((Long) value); + } catch (Exception e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("long [%s] can't convert to timestamp", value)); + } + } + } + ); + + //compatibles.put(TypeInfos.LONG_TYPE_INFO, + // TypeInfos.BIG_INTEGER_TYPE_INFO, + // null + //); + + //compatibles.put(TypeInfos.LONG_TYPE_INFO, + // TypeInfos.BIG_DECIMAL_TYPE_INFO, + // null + //); + // + + compatibles.put(TypeInfos.BIG_DECIMAL_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((BigDecimal) value).toString() + ); + + compatibles.put(TypeInfos.BIG_INTEGER_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((BigInteger) value).toString() + ); + + compatibles.put(TypeInfos.FLOAT_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((Float) value).toString() + ); + + //TODO in future, will be removed. float -> big decimal + compatibles.put(TypeInfos.FLOAT_TYPE_INFO, + TypeInfos.BIG_DECIMAL_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return BigDecimal.valueOf(((Float) value).doubleValue()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("float [%s] can't convert to big decimal.", value)); + } + } + } + ); + + compatibles.put(TypeInfos.FLOAT_TYPE_INFO, + TypeInfos.DOUBLE_TYPE_INFO, + (value) -> ((Float) value).doubleValue() + ); + + //TODO in future, will be removed. float -> long + compatibles.put(TypeInfos.FLOAT_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((Float) value).longValue() + ); + + compatibles.put(TypeInfos.DOUBLE_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((Float) value).toString() + ); + + compatibles.put(TypeInfos.DOUBLE_TYPE_INFO, + TypeInfos.BIG_DECIMAL_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return BigDecimal.valueOf(((Double) value)); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("double [%s] can't convert to big decimal.", value)); + } + } + } + ); + + //TODO in future, will be removed. double -> big integer + compatibles.put(TypeInfos.DOUBLE_TYPE_INFO, + TypeInfos.BIG_INTEGER_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return BigDecimal.valueOf(((Double) value)).toBigInteger(); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("double [%s] can't convert to big integer.", value)); + } + } + } + ); + + //TODO in future, will be removed. double -> long + compatibles.put(TypeInfos.DOUBLE_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((Double) value).longValue() + ); + } + + private void addStringTypeInfoCompatibles() { + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.SHORT_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return NumberUtils.toShort(value.toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to short.", value)); + } + } + } + ); + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.INT_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return NumberUtils.toInt(value.toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to int.", value)); + } + } + } + ); + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return NumberUtils.toLong(value.toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to long.", value)); + } + } + } + ); + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.FLOAT_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return NumberUtils.toFloat(value.toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to float.", value)); + } + } + } + ); + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.DOUBLE_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return NumberUtils.toDouble(value.toString()); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to double.", value)); + } + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.SQL_DATE_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + try { + LocalDate localDate = (LocalDate) (compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_TIME_TYPE_INFO).apply(value)); + long timestamp = localDate.atStartOfDay() + .atZone(dateTimeZone) + .toInstant() + .toEpochMilli(); + return new Date(timestamp); + } catch (Exception e) { + try { + LocalDateTime localDateTime = (LocalDateTime) (compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_DATE_TIME_TYPE_INFO).apply(value)); + long timestamp = localDateTime + .atZone(dateTimeZone) + .toInstant() + .toEpochMilli(); + return new Date(timestamp); + } catch (Exception e1) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to date.", value)); + } + } + } + } + ); + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.SQL_TIME_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + try { + LocalTime localTime = (LocalTime) (compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_TIME_TYPE_INFO).apply(value)); + return new Time(localTime.getHour(), localTime.getMinute(), localTime.getSecond()); + } catch (Exception e) { + try { + LocalDateTime localDateTime = (LocalDateTime) (compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_DATE_TIME_TYPE_INFO).apply(value)); + return new Time(localDateTime.getHour(), localDateTime.getMinute(), localDateTime.getSecond()); + } catch (Exception e1) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("string [%s] can't convert to date.", value)); + } + } + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.SQL_TIMESTAMP_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + LocalDateTime localDateTime = (LocalDateTime) (compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_DATE_TIME_TYPE_INFO).apply(value)); + return new Timestamp(localDateTime.atZone(dateTimeZone) + .toInstant() + .toEpochMilli()); + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.LOCAL_DATE_TIME_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + return LocalDateTime.parse((String) value, dateTimeFormatter); + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.LOCAL_DATE_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + try { + return LocalDate.parse((String) value, dateFormatter); + } catch (Exception e) { + return ((LocalDateTime) compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_DATE_TIME_TYPE_INFO) + .apply(value)).toLocalDate(); + } + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.LOCAL_TIME_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + try { + return LocalTime.parse((String) value, timeFormatter); + } catch (Exception e) { + return ((LocalDateTime) compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.LOCAL_DATE_TIME_TYPE_INFO) + .apply(value)).toLocalTime(); + } + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.BIG_DECIMAL_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + + try { + return new BigDecimal((String) value); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("String[%s] can't convert to big decimal.", value)); + } + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + TypeInfos.BIG_INTEGER_TYPE_INFO, + new Function() { + @Override + public Object apply(Object value) { + try { + return ((BigDecimal) compatibles.get(TypeInfos.STRING_TYPE_INFO, TypeInfos.BIG_DECIMAL_TYPE_INFO) + .apply(value)).toBigInteger(); + } catch (NumberFormatException e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("String[%s] can't convert to big integer.", value)); + } + } + } + ); + + compatibles.put(TypeInfos.STRING_TYPE_INFO, + BasicArrayTypeInfo.BINARY_TYPE_INFO, + (value) -> ((String) value).getBytes(StandardCharsets.UTF_8) + ); + } + + private void addBooleanTypeInfoCompatibles() { + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.STRING_TYPE_INFO, + (value) -> ((Boolean) value).toString() + ); + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.INT_TYPE_INFO, + (value) -> ((Boolean) value) ? 1 : 0 + ); + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.SHORT_TYPE_INFO, + (value) -> ((Boolean) value) ? 1 : 0 + ); + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.LONG_TYPE_INFO, + (value) -> ((Boolean) value) ? 1L : 0L + ); + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.FLOAT_TYPE_INFO, + (value) -> ((Boolean) value) ? 1f : 0f + ); + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.DOUBLE_TYPE_INFO, + (value) -> ((Boolean) value) ? 1d : 0d + ); + + //TODO in future, will be removed. boolean -> big integer + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.BIG_INTEGER_TYPE_INFO, + new Function() { + @Override + public Object apply(Object o) { + return BigInteger.valueOf((long) compatibles.get(TypeInfos.BOOLEAN_TYPE_INFO, TypeInfos.LONG_TYPE_INFO) + .apply(o)); + } + } + ); + + //TODO in future, will be removed. boolean -> big decimal + compatibles.put(TypeInfos.BOOLEAN_TYPE_INFO, + TypeInfos.BIG_DECIMAL_TYPE_INFO, + new Function() { + @Override + public Object apply(Object o) { + return BigDecimal.valueOf((long) compatibles.get(TypeInfos.BOOLEAN_TYPE_INFO, TypeInfos.LONG_TYPE_INFO) + .apply(o)); + } + } + ); + } + + public Object compatibleTo(TypeInfo from, + TypeInfo to, + Object value) { + Function function = compatibles.get(from, to); + if (Objects.isNull(function)) { + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Type %s not compatible with another type %s.", from, to)); + } + try { + return function.apply(value); + } catch (BitSailException e) { + throw e; + } catch (Exception e) { + throw BitSailException.asBitSailException( + CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Type %s's value [%s] can't convert to another type %s.", from, value, to), e); + } + } + + private static boolean integerOverflow(long longValue) { + return longValue > MAX_INTEGER_LONG_VALUE || longValue < MIN_INTEGER_LONG_VALUE; + } + + private static boolean shortOverflow(long longValue) { + return longValue > MAX_SHORT_LONG_VALUE || longValue < MIN_SHORT_LONG_VALUE; + } + + private static boolean shortOverflow(int integerValue) { + return integerValue > MAX_SHORT_INTEGER_VALUE || integerValue < MIN_SHORT_INTEGER_VALUE; + } + + @VisibleForTesting + public HashBasedTable, TypeInfo, Function> getCompatibles() { + return compatibles; + } + +} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java index 7851bd6ba..388f44e3f 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverter.java @@ -22,24 +22,15 @@ import com.bytedance.bitsail.common.column.MapColumn; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.exception.CommonErrorCode; -import com.bytedance.bitsail.common.option.CommonOptions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.MapUtils; -import org.apache.commons.lang3.math.NumberUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Serializable; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.charset.Charset; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -52,19 +43,10 @@ public class TypeInfoValueConverter implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(TypeInfoValueConverter.class); - private final BitSailConfiguration commonConfiguration; - private final DateTimeFormatter dateFormatter; - private final DateTimeFormatter timeFormatter; - private final DateTimeFormatter dateTimeFormatter; + private TypeInfoCompatibles typeInfoCompatibles; public TypeInfoValueConverter(BitSailConfiguration commonConfiguration) { - this.commonConfiguration = commonConfiguration; - this.dateFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions - .DateFormatOptions.DATE_PATTERN)); - this.timeFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions - .DateFormatOptions.TIME_PATTERN)); - this.dateTimeFormatter = DateTimeFormatter.ofPattern(commonConfiguration.get(CommonOptions - .DateFormatOptions.DATE_TIME_PATTERN)); + this.typeInfoCompatibles = new TypeInfoCompatibles(commonConfiguration); } /** @@ -244,7 +226,7 @@ private Object convertJavaObject(Object value, TypeInfo typeInfo) { if (typeInfo instanceof MapTypeInfo) { if (!(value instanceof Map)) { throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - "Object can't convert to map type."); + String.format("Type %s can't convert to map type.", value.getClass())); } MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; Map origin = (Map) value; @@ -257,7 +239,7 @@ private Object convertJavaObject(Object value, TypeInfo typeInfo) { } else if (typeInfo instanceof ListTypeInfo) { if (!(value instanceof List)) { throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - "Object can't convert to list type."); + String.format("Type %s can't convert to list type.", value.getClass())); } ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; List origin = (List) value; @@ -281,185 +263,14 @@ private Object convertPrimitiveObject(Object value, TypeInfo typeInfo) { return null; } - Class typeInfoTypeClass = typeInfo.getTypeClass(); - - if (STRING_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof byte[]) { - return new String((byte[]) value, Charset.defaultCharset()); - } - return value.toString(); - } - - if (TypeInfos.SHORT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Number) { - return ((Number) value).shortValue(); - } - return NumberUtils.createNumber(value.toString()).shortValue(); - } - - if (TypeInfos.INT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Integer) { - return (Integer) value; - } - if (value instanceof Number) { - return ((Number) value).intValue(); - } - return NumberUtils.createNumber(value.toString()).intValue(); - } - - if (TypeInfos.LONG_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Long) { - return (Long) value; - } - if (value instanceof Number) { - return ((Number) value).longValue(); - } - return NumberUtils.createNumber(value.toString()).longValue(); - } - - if (TypeInfos.FLOAT_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Number) { - return ((Number) value).floatValue(); - } - return NumberUtils.createNumber(value.toString()).floatValue(); - } - - if (TypeInfos.DOUBLE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Number) { - return ((Number) value).doubleValue(); - } - return NumberUtils.createNumber(value.toString()).doubleValue(); - } - - if (TypeInfos.BIG_INTEGER_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Number) { - return ((Number) value).intValue(); - } - return new BigInteger(value.toString()); - } - - if (TypeInfos.BIG_DECIMAL_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return new BigDecimal(value.toString()); - } - - if (TypeInfos.BOOLEAN_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof Integer) { - return (Integer) value != 0; - } - if (value instanceof Long) { - return (Long) value != 0; - } - String str = value.toString(); - return Boolean.parseBoolean(str); - } - - if (TypeInfos.LOCAL_DATE_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return convertLocalDateTime(value, typeInfo); - } - - if (TypeInfos.LOCAL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return convertLocalDate(value, typeInfo); - } - - if (TypeInfos.LOCAL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return convertLocalTime(value, typeInfo); - } - - if (TypeInfos.SQL_TIMESTAMP_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return convertSqlTimestamp(value, typeInfo); - } - - if (TypeInfos.SQL_DATE_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return convertSqlDate(value, typeInfo); - } - - if (TypeInfos.SQL_TIME_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - return convertSqlTime(value, typeInfo); - } - - if (BasicArrayTypeInfo.BINARY_TYPE_INFO.getTypeClass() == typeInfoTypeClass) { - if (value instanceof byte[]) { - return (byte[]) value; - } - } - - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); - } - - private Object convertSqlTime(Object value, TypeInfo typeInfo) { - return null; - } - - private Object convertSqlDate(Object value, TypeInfo typeInfo) { - return null; - } - - private Object convertSqlTimestamp(Object value, TypeInfo typeInfo) { - return null; - } - - private Object convertLocalTime(Object value, TypeInfo typeInfo) { - if (value instanceof LocalTime) { - return (LocalTime) value; - } - if (value instanceof LocalDateTime) { - return ((LocalDateTime) value).toLocalTime(); - } - if (value instanceof String) { - //convert string to local date time. - try { - return LocalTime.parse(value.toString(), timeFormatter); - } catch (Exception e) { - LOG.debug("Value {} can't convert to local time.", value); - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); - } - } - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); - } - - private Object convertLocalDate(Object value, TypeInfo typeInfo) { - if (value instanceof LocalDateTime) { - return ((LocalDateTime) value).toLocalDate(); - } - if (value instanceof LocalDate) { - return (LocalDate) value; - } - if (value instanceof String) { - //convert string to local date time. - try { - return LocalDate.parse(value.toString(), dateFormatter); - } catch (Exception e) { - LOG.debug("Value {} can't convert to local date time.", value); - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); - } + TypeInfo valueTypeInfo = TypeInfoBridge.bridgeTypeClass(value.getClass()); + if (value.getClass() == typeInfo.getTypeClass()) { + return value; } - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); - } - private Object convertLocalDateTime(Object value, TypeInfo typeInfo) { - if (value instanceof LocalDateTime) { - return (LocalDateTime) value; - } - if (value instanceof LocalDate) { - return ((LocalDate) value).atStartOfDay(); + if (!(valueTypeInfo instanceof BasicTypeInfo)) { + return valueTypeInfo.compatibleTo(typeInfo, value); } - if (value instanceof String) { - //convert string to local date time. - try { - return LocalDateTime.parse(value.toString(), dateTimeFormatter); - } catch (Exception e) { - LOG.debug("Value {} can't convert to local date time.", value); - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); - } - } - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Value %s can't convert into type info %s.", value, typeInfo)); + return typeInfoCompatibles.compatibleTo(valueTypeInfo, typeInfo, value); } - } diff --git a/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatiblesTest.java b/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatiblesTest.java new file mode 100644 index 000000000..3487b52c8 --- /dev/null +++ b/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatiblesTest.java @@ -0,0 +1,239 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.common.typeinfo; + +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; + +import org.apache.commons.collections.MapUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.Map; +import java.util.function.Function; + +public class TypeInfoCompatiblesTest { + + private TypeInfoCompatibles typeInfoCompatibles; + + @Before + public void before() { + typeInfoCompatibles = new TypeInfoCompatibles(BitSailConfiguration.newDefault()); + } + + @Test + public void testIntTypeInfoCompatibles() { + int value = 100; + TypeInfo source = TypeInfos.INT_TYPE_INFO; + assertTypeInfo(source, value); + } + + @Test + public void testIntTypeInfoCompatiblesOverflow() { + int value = Integer.MAX_VALUE; + TypeInfo source = TypeInfos.LONG_TYPE_INFO; + + Assert.assertThrows(BitSailException.class, + () -> typeInfoCompatibles.compatibleTo(source, TypeInfos.SHORT_TYPE_INFO, value)); + } + + @Test + public void testLongTypeInfoCompatibles() { + long value = 1000L; + TypeInfo source = TypeInfos.LONG_TYPE_INFO; + assertTypeInfo(source, value); + } + + @Test + public void testLongTypeInfoCompatiblesOverflow() { + long value = Long.MAX_VALUE; + TypeInfo source = TypeInfos.LONG_TYPE_INFO; + + Assert.assertThrows(BitSailException.class, + () -> typeInfoCompatibles.compatibleTo(source, TypeInfos.SHORT_TYPE_INFO, value)); + Assert.assertThrows(BitSailException.class, + () -> typeInfoCompatibles.compatibleTo(source, TypeInfos.INT_TYPE_INFO, value)); + } + + @Test + public void testStringTypeInfoCompatibles() { + String numberStr = "2012"; + TypeInfo source = TypeInfos.STRING_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.SHORT_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, numberStr); + assertTypeInfo(result, target); + + target = TypeInfos.INT_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, numberStr); + assertTypeInfo(result, target); + + target = TypeInfos.LONG_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, numberStr); + assertTypeInfo(result, target); + + target = BasicArrayTypeInfo.BINARY_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, numberStr); + assertTypeInfo(result, target); + + target = TypeInfos.BIG_INTEGER_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, numberStr); + assertTypeInfo(result, target); + + target = TypeInfos.BIG_DECIMAL_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, numberStr); + assertTypeInfo(result, target); + + String timestampStr = "2021-01-01 10:01:23"; + target = TypeInfos.SQL_TIMESTAMP_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, timestampStr); + assertTypeInfo(result, target); + + target = TypeInfos.SQL_DATE_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, timestampStr); + assertTypeInfo(result, target); + + target = TypeInfos.SQL_TIME_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, timestampStr); + assertTypeInfo(result, target); + } + + @Test + public void testSqlDateTypeInfoCompatibles() { + Date date = new Date(System.currentTimeMillis()); + TypeInfo source = TypeInfos.SQL_DATE_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.STRING_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, date); + assertTypeInfo(result, target); + } + + @Test + public void testSqlTimeTypeInfoCompatibles() { + Time time = new Time(System.currentTimeMillis()); + TypeInfo source = TypeInfos.SQL_TIME_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.STRING_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, time); + assertTypeInfo(result, target); + } + + @Test + public void testSqlTimestampTypeInfoCompatibles() { + long timestamp = System.currentTimeMillis(); + TypeInfo source = TypeInfos.SQL_TIME_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.STRING_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, new Timestamp(timestamp)); + assertTypeInfo(result, target); + + target = TypeInfos.LONG_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, new Timestamp(timestamp)); + assertTypeInfo(result, target); + Assert.assertEquals((long) result, timestamp); + } + + @Test + public void testLocalDateTimeTypeInfoCompatibles() { + LocalDateTime localDateTime = LocalDateTime.now(); + TypeInfo source = TypeInfos.LOCAL_DATE_TIME_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.STRING_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, localDateTime); + assertTypeInfo(result, target); + + target = TypeInfos.LONG_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, localDateTime); + assertTypeInfo(result, target); + } + + @Test + public void testLocalTimeTypeInfoCompatibles() { + LocalTime localTime = LocalTime.now(); + TypeInfo source = TypeInfos.LOCAL_TIME_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.STRING_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, localTime); + assertTypeInfo(result, target); + } + + @Test + public void testLocalDateTypeInfoCompatibles() { + LocalDate localDate = LocalDate.now(); + TypeInfo source = TypeInfos.LOCAL_DATE_TYPE_INFO; + + Object result; + TypeInfo target; + target = TypeInfos.STRING_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, localDate); + assertTypeInfo(result, target); + } + + @Test + public void testByteArrayInfoCompatibles() { + String str = "bit-sail"; + TypeInfo source = TypeInfos.STRING_TYPE_INFO; + + Object result; + TypeInfo target; + target = BasicArrayTypeInfo.BINARY_TYPE_INFO; + result = typeInfoCompatibles.compatibleTo(source, target, str); + assertTypeInfo(result, target); + + result = typeInfoCompatibles.compatibleTo(target, source, result); + assertTypeInfo(result, source); + + Assert.assertEquals(str, result); + } + + private void assertTypeInfo(TypeInfo source, Object value) { + Map, Function> targets = typeInfoCompatibles.getCompatibles() + .row(source); + + if (MapUtils.isEmpty(targets)) { + return; + } + + for (Map.Entry, Function> entry : targets.entrySet()) { + Object result = typeInfoCompatibles.compatibleTo(source, entry.getKey(), value); + assertTypeInfo(result, entry.getKey()); + } + } + + private static void assertTypeInfo(Object value, TypeInfo typeInfo) { + Assert.assertTrue(value.getClass().isAssignableFrom(typeInfo.getTypeClass())); + } +} \ No newline at end of file diff --git a/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java b/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java index f5d027d01..d8c5283dd 100644 --- a/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java +++ b/bitsail-common/src/test/java/com/bytedance/bitsail/common/typeinfo/TypeInfoValueConverterTest.java @@ -16,6 +16,85 @@ package com.bytedance.bitsail.common.typeinfo; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.util.JsonSerializer; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; +import java.util.Map; + public class TypeInfoValueConverterTest { + private TypeInfoValueConverter typeInfoValueConverter; + + @Before + public void before() { + typeInfoValueConverter = new TypeInfoValueConverter(BitSailConfiguration.newDefault()); + } + + @Test + public void testMapValue() { + Map maps = Maps.newHashMap(); + + maps.put("key1", "value1"); + maps.put("key2", "value2"); + maps.put("key3", "value3"); + Object result; + result = typeInfoValueConverter.convertObject(maps, TypeInfos.STRING_TYPE_INFO); + Assert.assertTrue(result instanceof String); + Map resultMap = JsonSerializer.parseToMap((String) result); + Assert.assertEquals(maps.size(), resultMap.size()); + + result = typeInfoValueConverter.convertObject(maps, new MapTypeInfo<>(TypeInfos.STRING_TYPE_INFO, TypeInfos.STRING_TYPE_INFO)); + Assert.assertEquals(maps, result); + + maps.clear(); + maps.put("key1", "1"); + maps.put("key2", "2"); + maps.put("key3", "3"); + + result = typeInfoValueConverter.convertObject(maps, new MapTypeInfo<>(TypeInfos.STRING_TYPE_INFO, TypeInfos.LONG_TYPE_INFO)); + Assert.assertNotEquals(maps, result); + List list = Lists.newArrayList((((Map) result).values())); + Assert.assertTrue(list.get(0) instanceof Long); + } + + @Test + public void testListValue() { + List list = Lists.newArrayList(); + + list.add("value1"); + list.add("value2"); + list.add("value3"); + Object result; + result = typeInfoValueConverter.convertObject(list, TypeInfos.STRING_TYPE_INFO); + Assert.assertTrue(result instanceof String); + List converted = JsonSerializer.parseToList((String) result, String.class); + Assert.assertEquals(list.size(), converted.size()); + + result = typeInfoValueConverter.convertObject(list, new ListTypeInfo<>(TypeInfos.STRING_TYPE_INFO)); + Assert.assertEquals(list, result); + + list.clear(); + list.add("1"); + list.add("2"); + list.add("3"); + + result = typeInfoValueConverter.convertObject(list, new ListTypeInfo<>(TypeInfos.LONG_TYPE_INFO)); + Assert.assertNotEquals(list, result); + Assert.assertTrue(((List) result).get(0) instanceof Long); + } + + @Test + public void testPrimitive() { + String key = new String("KEY"); + Object result; + result = typeInfoValueConverter.convertObject(key, TypeInfos.STRING_TYPE_INFO); + Assert.assertEquals(result, key); + } } \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java index a592dd0ac..19a345d84 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java @@ -18,8 +18,10 @@ import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.row.RowKind; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import java.io.IOException; @@ -27,22 +29,72 @@ import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.ZoneOffset; +import java.util.Date; public class DebeziumRowDeserializationSchemaTest { + private DebeziumRowDeserializationSchema deserializationSchema; + + @Before + public void before() { + deserializationSchema = new DebeziumRowDeserializationSchema(BitSailConfiguration.newDefault()); + } + @Test public void test() throws URISyntaxException, IOException { byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest .class.getClassLoader().getResource("file/debezium.json") .toURI().getPath())); - DebeziumRowDeserializationSchema debeziumRowDeserializationSchema = - new DebeziumRowDeserializationSchema(BitSailConfiguration.newDefault()); - Row deserialize = debeziumRowDeserializationSchema.deserialize(new String(bytes), + Row deserialize = deserializationSchema.deserialize(new String(bytes), new String[] {"double_type"}); Assert.assertNotNull(deserialize); Assert.assertTrue(deserialize.getField(0) instanceof BigDecimal); } + @Test + public void testInsert() throws URISyntaxException, IOException { + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + .class.getClassLoader().getResource("file/debezium_insert.json") + .toURI().getPath())); + + Row deserialize = deserializationSchema.deserialize(new String(bytes), + new String[] {"double_type"}); + + Assert.assertNotNull(deserialize); + Assert.assertEquals(deserialize.getKind(), RowKind.INSERT); + } + + @Test + public void testUpsert() throws URISyntaxException, IOException { + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + .class.getClassLoader().getResource("file/debezium_upsert.json") + .toURI().getPath())); + + Row deserialize = deserializationSchema.deserialize(new String(bytes), + new String[] {"order_date"}); + + Assert.assertNotNull(deserialize); + Assert.assertEquals(LocalDate.ofEpochDay(16816).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(), + ((Date) deserialize.getField(0)).toInstant().toEpochMilli()); + Assert.assertEquals(deserialize.getKind(), RowKind.UPDATE_AFTER); + } + + @Test + public void testDelete() throws URISyntaxException, IOException { + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + .class.getClassLoader().getResource("file/postgres/debezium_pg_delete.json") + .toURI().getPath())); + + Row deserialize = deserializationSchema.deserialize(new String(bytes), + new String[] {"id"}); + + Assert.assertNotNull(deserialize); + Assert.assertEquals(deserialize.getField(0), 10049L); + Assert.assertEquals(deserialize.getKind(), RowKind.DELETE); + } + } \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_delete.json b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_delete.json new file mode 100644 index 000000000..2d7c78abf --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_delete.json @@ -0,0 +1,54 @@ +{ + "before":{ + "id":10047, + "bigint_info":null, + "bigserial_info":0, + "bit_info":null, + "bit_varying_info":null, + "boolean_info":null, + "bytea_info":null, + "character_varying_info":null, + "character_info":null, + "cidr_info":null, + "date":null, + "double_precision_info":null, + "inet_info":null, + "integer_info":null, + "interval_info":null, + "macaddr_info":null, + "money_info":null, + "numeric_info":null, + "decimal_info":null, + "real_info":null, + "point_info":null, + "smallint_info":null, + "smallserial_info":0, + "serial_info":0, + "text_info":null, + "time_info":null, + "timestamp_info":null, + "uuid_info":null, + "xml_info":null, + "json_info":null, + "int_list":null, + "txt_list":null + }, + "after":null, + "source":{ + "version":"1.6.4.Final", + "connector":"postgresql", + "name":"abc", + "ts_ms":1682325644419, + "snapshot":"false", + "db":"test_cdc", + "sequence":"[\"434291792\",\"434291792\"]", + "schema":"public", + "table":"dts_source_all111", + "txId":201659, + "lsn":434293384, + "xmin":null + }, + "op":"d", + "ts_ms":1682325644348, + "transaction":null +} \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_insert.json b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_insert.json new file mode 100644 index 000000000..5699f29b4 --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_insert.json @@ -0,0 +1,257 @@ +{ + "schema":{ + "type":"struct", + "fields":[ + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"int_type" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"4", + "connect.decimal.precision":"20" + }, + "field":"double_type" + }, + { + "type":"string", + "optional":true, + "field":"date_type" + }, + { + "type":"string", + "optional":true, + "default":"", + "field":"varchar_type" + }, + { + "type":"int32", + "optional":false, + "name":"io.debezium.time.Date", + "version":1, + "field":"datetime" + } + ], + "optional":true, + "name":"localhost.test.jdbc_source_test.Value", + "field":"before" + }, + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"int_type" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"4", + "connect.decimal.precision":"20" + }, + "field":"double_type" + }, + { + "type":"string", + "optional":true, + "field":"date_type" + }, + { + "type":"string", + "optional":true, + "default":"", + "field":"varchar_type" + }, + { + "type":"int32", + "optional":false, + "name":"io.debezium.time.Date", + "version":1, + "field":"datetime" + } + ], + "optional":true, + "name":"localhost.test.jdbc_source_test.Value", + "field":"after" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"version" + }, + { + "type":"string", + "optional":false, + "field":"connector" + }, + { + "type":"string", + "optional":false, + "field":"name" + }, + { + "type":"int64", + "optional":false, + "field":"ts_ms" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Enum", + "version":1, + "parameters":{ + "allowed":"true,last,false" + }, + "default":"false", + "field":"snapshot" + }, + { + "type":"string", + "optional":false, + "field":"db" + }, + { + "type":"string", + "optional":true, + "field":"sequence" + }, + { + "type":"string", + "optional":true, + "field":"table" + }, + { + "type":"int64", + "optional":false, + "field":"server_id" + }, + { + "type":"string", + "optional":true, + "field":"gtid" + }, + { + "type":"string", + "optional":false, + "field":"file" + }, + { + "type":"int64", + "optional":false, + "field":"pos" + }, + { + "type":"int32", + "optional":false, + "field":"row" + }, + { + "type":"int64", + "optional":true, + "field":"thread" + }, + { + "type":"string", + "optional":true, + "field":"query" + } + ], + "optional":false, + "name":"io.debezium.connector.mysql.Source", + "field":"source" + }, + { + "type":"string", + "optional":false, + "field":"op" + }, + { + "type":"int64", + "optional":true, + "field":"ts_ms" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":false, + "field":"total_order" + }, + { + "type":"int64", + "optional":false, + "field":"data_collection_order" + } + ], + "optional":true, + "field":"transaction" + } + ], + "optional":false, + "name":"localhost.test.jdbc_source_test.Envelope" + }, + "payload":{ + "before":null, + "after":{ + "id":1, + "int_type":1001, + "double_type":"AbH8", + "date_type":"2022-10-01", + "varchar_type":"varchar_01", + "datetime":19297 + }, + "source":{ + "version":"1.6.4.Final", + "connector":"mysql", + "name":"localhost", + "ts_ms":1682237044000, + "snapshot":"false", + "db":"test", + "sequence":null, + "table":"jdbc_source_test", + "server_id":1, + "gtid":null, + "file":"binlog.000002", + "pos":986, + "row":0, + "thread":null, + "query":null + }, + "op":"c", + "ts_ms":1682237048134, + "transaction":null + } +} \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_upsert.json b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_upsert.json new file mode 100644 index 000000000..6b5a456ab --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/debezium_upsert.json @@ -0,0 +1,207 @@ +{ + "schema":{ + "name":"dbserver1.inventory.orders.Envelope", + "optional":false, + "type":"struct", + "fields":[ + { + "field":"before", + "name":"dbserver1.inventory.orders.Value", + "optional":true, + "type":"struct", + "fields":[ + { + "field":"order_number", + "optional":false, + "type":"int32" + }, + { + "field":"order_date", + "name":"io.debezium.time.Date", + "optional":false, + "type":"int32", + "version":1 + }, + { + "field":"purchaser", + "optional":false, + "type":"int32" + }, + { + "field":"quantity", + "optional":false, + "type":"int32" + }, + { + "field":"product_id", + "optional":false, + "type":"int32" + } + ] + }, + { + "field":"after", + "name":"dbserver1.inventory.orders.Value", + "optional":true, + "type":"struct", + "fields":[ + { + "field":"order_number", + "optional":false, + "type":"int32" + }, + { + "field":"order_date", + "name":"io.debezium.time.Date", + "optional":false, + "type":"int32", + "version":1 + }, + { + "field":"purchaser", + "optional":false, + "type":"int32" + }, + { + "field":"quantity", + "optional":false, + "type":"int32" + }, + { + "field":"product_id", + "optional":false, + "type":"int32" + } + ] + }, + { + "field":"source", + "name":"io.debezium.connector.mysql.Source", + "optional":false, + "type":"struct", + "fields":[ + { + "field":"version", + "optional":false, + "type":"string" + }, + { + "field":"connector", + "optional":false, + "type":"string" + }, + { + "field":"name", + "optional":false, + "type":"string" + }, + { + "field":"ts_ms", + "optional":false, + "type":"int64" + }, + { + "default":"false", + "field":"snapshot", + "name":"io.debezium.data.Enum", + "optional":true, + "type":"string", + "version":1, + "parameters":{ + "allowed":"true,last,false" + } + }, + { + "field":"db", + "optional":false, + "type":"string" + }, + { + "field":"table", + "optional":true, + "type":"string" + }, + { + "field":"server_id", + "optional":false, + "type":"int64" + }, + { + "field":"gtid", + "optional":true, + "type":"string" + }, + { + "field":"file", + "optional":false, + "type":"string" + }, + { + "field":"pos", + "optional":false, + "type":"int64" + }, + { + "field":"row", + "optional":false, + "type":"int32" + }, + { + "field":"thread", + "optional":true, + "type":"int64" + }, + { + "field":"query", + "optional":true, + "type":"string" + } + ] + }, + { + "field":"op", + "optional":false, + "type":"string" + }, + { + "field":"ts_ms", + "optional":true, + "type":"int64" + } + ] + }, + "payload":{ + "op":"u", + "before":{ + "order_date":16816, + "quantity":1, + "purchaser":1001, + "order_number":10001, + "product_id":102 + }, + "after":{ + "order_date":16816, + "quantity":6, + "purchaser":1001, + "order_number":10001, + "product_id":102 + }, + "source":{ + "query":null, + "thread":4, + "server_id":223344, + "version":"1.0.3.Final", + "file":"mysql-bin.000007", + "connector":"mysql", + "pos":354, + "name":"dbserver1", + "gtid":null, + "row":0, + "ts_ms":1591620600000, + "snapshot":"false", + "db":"inventory", + "table":"orders" + }, + "ts_ms":1591620602204 + } +} \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_delete.json b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_delete.json new file mode 100644 index 000000000..a8d8d962b --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_delete.json @@ -0,0 +1,635 @@ +{ + "schema":{ + "type":"struct", + "fields":[ + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":true, + "field":"bigint_info" + }, + { + "type":"int64", + "optional":false, + "default":0, + "field":"bigserial_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"3" + }, + "field":"bit_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"1000" + }, + "field":"bit_varying_info" + }, + { + "type":"boolean", + "optional":true, + "field":"boolean_info" + }, + { + "type":"bytes", + "optional":true, + "field":"bytea_info" + }, + { + "type":"string", + "optional":true, + "field":"character_varying_info" + }, + { + "type":"string", + "optional":true, + "field":"character_info" + }, + { + "type":"string", + "optional":true, + "field":"cidr_info" + }, + { + "type":"int32", + "optional":true, + "name":"io.debezium.time.Date", + "version":1, + "field":"date" + }, + { + "type":"double", + "optional":true, + "field":"double_precision_info" + }, + { + "type":"string", + "optional":true, + "field":"inet_info" + }, + { + "type":"int32", + "optional":true, + "field":"integer_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroDuration", + "version":1, + "field":"interval_info" + }, + { + "type":"string", + "optional":true, + "field":"macaddr_info" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"2" + }, + "field":"money_info" + }, + { + "type":"double", + "optional":true, + "field":"numeric_info" + }, + { + "type":"double", + "optional":true, + "field":"decimal_info" + }, + { + "type":"float", + "optional":true, + "field":"real_info" + }, + { + "type":"struct", + "fields":[ + { + "type":"double", + "optional":false, + "field":"x" + }, + { + "type":"double", + "optional":false, + "field":"y" + }, + { + "type":"bytes", + "optional":true, + "field":"wkb" + }, + { + "type":"int32", + "optional":true, + "field":"srid" + } + ], + "optional":true, + "name":"io.debezium.data.geometry.Point", + "version":1, + "doc":"Geometry (POINT)", + "field":"point_info" + }, + { + "type":"int16", + "optional":true, + "field":"smallint_info" + }, + { + "type":"int16", + "optional":false, + "default":0, + "field":"smallserial_info" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"serial_info" + }, + { + "type":"string", + "optional":true, + "field":"text_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTime", + "version":1, + "field":"time_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTimestamp", + "version":1, + "field":"timestamp_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Uuid", + "version":1, + "field":"uuid_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Xml", + "version":1, + "field":"xml_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Json", + "version":1, + "field":"json_info" + }, + { + "type":"array", + "items":{ + "type":"int32", + "optional":true + }, + "optional":true, + "field":"int_list" + }, + { + "type":"array", + "items":{ + "type":"string", + "optional":true + }, + "optional":true, + "field":"txt_list" + } + ], + "optional":true, + "name":"abc.public.dts_source_all111.Value", + "field":"before" + }, + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":true, + "field":"bigint_info" + }, + { + "type":"int64", + "optional":false, + "default":0, + "field":"bigserial_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"3" + }, + "field":"bit_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"1000" + }, + "field":"bit_varying_info" + }, + { + "type":"boolean", + "optional":true, + "field":"boolean_info" + }, + { + "type":"bytes", + "optional":true, + "field":"bytea_info" + }, + { + "type":"string", + "optional":true, + "field":"character_varying_info" + }, + { + "type":"string", + "optional":true, + "field":"character_info" + }, + { + "type":"string", + "optional":true, + "field":"cidr_info" + }, + { + "type":"int32", + "optional":true, + "name":"io.debezium.time.Date", + "version":1, + "field":"date" + }, + { + "type":"double", + "optional":true, + "field":"double_precision_info" + }, + { + "type":"string", + "optional":true, + "field":"inet_info" + }, + { + "type":"int32", + "optional":true, + "field":"integer_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroDuration", + "version":1, + "field":"interval_info" + }, + { + "type":"string", + "optional":true, + "field":"macaddr_info" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"2" + }, + "field":"money_info" + }, + { + "type":"double", + "optional":true, + "field":"numeric_info" + }, + { + "type":"double", + "optional":true, + "field":"decimal_info" + }, + { + "type":"float", + "optional":true, + "field":"real_info" + }, + { + "type":"struct", + "fields":[ + { + "type":"double", + "optional":false, + "field":"x" + }, + { + "type":"double", + "optional":false, + "field":"y" + }, + { + "type":"bytes", + "optional":true, + "field":"wkb" + }, + { + "type":"int32", + "optional":true, + "field":"srid" + } + ], + "optional":true, + "name":"io.debezium.data.geometry.Point", + "version":1, + "doc":"Geometry (POINT)", + "field":"point_info" + }, + { + "type":"int16", + "optional":true, + "field":"smallint_info" + }, + { + "type":"int16", + "optional":false, + "default":0, + "field":"smallserial_info" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"serial_info" + }, + { + "type":"string", + "optional":true, + "field":"text_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTime", + "version":1, + "field":"time_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTimestamp", + "version":1, + "field":"timestamp_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Uuid", + "version":1, + "field":"uuid_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Xml", + "version":1, + "field":"xml_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Json", + "version":1, + "field":"json_info" + }, + { + "type":"array", + "items":{ + "type":"int32", + "optional":true + }, + "optional":true, + "field":"int_list" + }, + { + "type":"array", + "items":{ + "type":"string", + "optional":true + }, + "optional":true, + "field":"txt_list" + } + ], + "optional":true, + "name":"abc.public.dts_source_all111.Value", + "field":"after" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"version" + }, + { + "type":"string", + "optional":false, + "field":"connector" + }, + { + "type":"string", + "optional":false, + "field":"name" + }, + { + "type":"int64", + "optional":false, + "field":"ts_ms" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Enum", + "version":1, + "parameters":{ + "allowed":"true,last,false" + }, + "default":"false", + "field":"snapshot" + }, + { + "type":"string", + "optional":false, + "field":"db" + }, + { + "type":"string", + "optional":true, + "field":"sequence" + }, + { + "type":"string", + "optional":false, + "field":"schema" + }, + { + "type":"string", + "optional":false, + "field":"table" + }, + { + "type":"int64", + "optional":true, + "field":"txId" + }, + { + "type":"int64", + "optional":true, + "field":"lsn" + }, + { + "type":"int64", + "optional":true, + "field":"xmin" + } + ], + "optional":false, + "name":"io.debezium.connector.postgresql.Source", + "field":"source" + }, + { + "type":"string", + "optional":false, + "field":"op" + }, + { + "type":"int64", + "optional":true, + "field":"ts_ms" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":false, + "field":"total_order" + }, + { + "type":"int64", + "optional":false, + "field":"data_collection_order" + } + ], + "optional":true, + "field":"transaction" + } + ], + "optional":false, + "name":"abc.public.dts_source_all111.Envelope" + }, + "payload":{ + "before":{ + "id":10049, + "bigint_info":null, + "bigserial_info":0, + "bit_info":null, + "bit_varying_info":null, + "boolean_info":null, + "bytea_info":null, + "character_varying_info":null, + "character_info":null, + "cidr_info":null, + "date":null, + "double_precision_info":null, + "inet_info":null, + "integer_info":null, + "interval_info":null, + "macaddr_info":null, + "money_info":null, + "numeric_info":null, + "decimal_info":null, + "real_info":null, + "point_info":null, + "smallint_info":null, + "smallserial_info":0, + "serial_info":0, + "text_info":null, + "time_info":null, + "timestamp_info":null, + "uuid_info":null, + "xml_info":null, + "json_info":null, + "int_list":null, + "txt_list":null + }, + "after":null, + "source":{ + "version":"1.6.4.Final", + "connector":"postgresql", + "name":"abc", + "ts_ms":1682326458976, + "snapshot":"false", + "db":"test_cdc", + "sequence":"[\"434325360\",\"434325360\"]", + "schema":"public", + "table":"dts_source_all111", + "txId":201667, + "lsn":434326896, + "xmin":null + }, + "op":"d", + "ts_ms":1682326459323, + "transaction":null + } +} \ No newline at end of file diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_upsert.json b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_upsert.json new file mode 100644 index 000000000..379e15e8e --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/resources/file/postgres/debezium_pg_upsert.json @@ -0,0 +1,680 @@ +{ + "schema":{ + "type":"struct", + "fields":[ + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":true, + "field":"bigint_info" + }, + { + "type":"int64", + "optional":false, + "default":0, + "field":"bigserial_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"3" + }, + "field":"bit_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"1000" + }, + "field":"bit_varying_info" + }, + { + "type":"boolean", + "optional":true, + "field":"boolean_info" + }, + { + "type":"bytes", + "optional":true, + "field":"bytea_info" + }, + { + "type":"string", + "optional":true, + "field":"character_varying_info" + }, + { + "type":"string", + "optional":true, + "field":"character_info" + }, + { + "type":"string", + "optional":true, + "field":"cidr_info" + }, + { + "type":"int32", + "optional":true, + "name":"io.debezium.time.Date", + "version":1, + "field":"date" + }, + { + "type":"double", + "optional":true, + "field":"double_precision_info" + }, + { + "type":"string", + "optional":true, + "field":"inet_info" + }, + { + "type":"int32", + "optional":true, + "field":"integer_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroDuration", + "version":1, + "field":"interval_info" + }, + { + "type":"string", + "optional":true, + "field":"macaddr_info" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"2" + }, + "field":"money_info" + }, + { + "type":"double", + "optional":true, + "field":"numeric_info" + }, + { + "type":"double", + "optional":true, + "field":"decimal_info" + }, + { + "type":"float", + "optional":true, + "field":"real_info" + }, + { + "type":"struct", + "fields":[ + { + "type":"double", + "optional":false, + "field":"x" + }, + { + "type":"double", + "optional":false, + "field":"y" + }, + { + "type":"bytes", + "optional":true, + "field":"wkb" + }, + { + "type":"int32", + "optional":true, + "field":"srid" + } + ], + "optional":true, + "name":"io.debezium.data.geometry.Point", + "version":1, + "doc":"Geometry (POINT)", + "field":"point_info" + }, + { + "type":"int16", + "optional":true, + "field":"smallint_info" + }, + { + "type":"int16", + "optional":false, + "default":0, + "field":"smallserial_info" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"serial_info" + }, + { + "type":"string", + "optional":true, + "field":"text_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTime", + "version":1, + "field":"time_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTimestamp", + "version":1, + "field":"timestamp_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Uuid", + "version":1, + "field":"uuid_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Xml", + "version":1, + "field":"xml_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Json", + "version":1, + "field":"json_info" + }, + { + "type":"array", + "items":{ + "type":"int32", + "optional":true + }, + "optional":true, + "field":"int_list" + }, + { + "type":"array", + "items":{ + "type":"string", + "optional":true + }, + "optional":true, + "field":"txt_list" + } + ], + "optional":true, + "name":"abc.public.dts_source_all111.Value", + "field":"before" + }, + { + "type":"struct", + "fields":[ + { + "type":"int64", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":true, + "field":"bigint_info" + }, + { + "type":"int64", + "optional":false, + "default":0, + "field":"bigserial_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"3" + }, + "field":"bit_info" + }, + { + "type":"bytes", + "optional":true, + "name":"io.debezium.data.Bits", + "version":1, + "parameters":{ + "length":"1000" + }, + "field":"bit_varying_info" + }, + { + "type":"boolean", + "optional":true, + "field":"boolean_info" + }, + { + "type":"bytes", + "optional":true, + "field":"bytea_info" + }, + { + "type":"string", + "optional":true, + "field":"character_varying_info" + }, + { + "type":"string", + "optional":true, + "field":"character_info" + }, + { + "type":"string", + "optional":true, + "field":"cidr_info" + }, + { + "type":"int32", + "optional":true, + "name":"io.debezium.time.Date", + "version":1, + "field":"date" + }, + { + "type":"double", + "optional":true, + "field":"double_precision_info" + }, + { + "type":"string", + "optional":true, + "field":"inet_info" + }, + { + "type":"int32", + "optional":true, + "field":"integer_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroDuration", + "version":1, + "field":"interval_info" + }, + { + "type":"string", + "optional":true, + "field":"macaddr_info" + }, + { + "type":"bytes", + "optional":true, + "name":"org.apache.kafka.connect.data.Decimal", + "version":1, + "parameters":{ + "scale":"2" + }, + "field":"money_info" + }, + { + "type":"double", + "optional":true, + "field":"numeric_info" + }, + { + "type":"double", + "optional":true, + "field":"decimal_info" + }, + { + "type":"float", + "optional":true, + "field":"real_info" + }, + { + "type":"struct", + "fields":[ + { + "type":"double", + "optional":false, + "field":"x" + }, + { + "type":"double", + "optional":false, + "field":"y" + }, + { + "type":"bytes", + "optional":true, + "field":"wkb" + }, + { + "type":"int32", + "optional":true, + "field":"srid" + } + ], + "optional":true, + "name":"io.debezium.data.geometry.Point", + "version":1, + "doc":"Geometry (POINT)", + "field":"point_info" + }, + { + "type":"int16", + "optional":true, + "field":"smallint_info" + }, + { + "type":"int16", + "optional":false, + "default":0, + "field":"smallserial_info" + }, + { + "type":"int32", + "optional":false, + "default":0, + "field":"serial_info" + }, + { + "type":"string", + "optional":true, + "field":"text_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTime", + "version":1, + "field":"time_info" + }, + { + "type":"int64", + "optional":true, + "name":"io.debezium.time.MicroTimestamp", + "version":1, + "field":"timestamp_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Uuid", + "version":1, + "field":"uuid_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Xml", + "version":1, + "field":"xml_info" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Json", + "version":1, + "field":"json_info" + }, + { + "type":"array", + "items":{ + "type":"int32", + "optional":true + }, + "optional":true, + "field":"int_list" + }, + { + "type":"array", + "items":{ + "type":"string", + "optional":true + }, + "optional":true, + "field":"txt_list" + } + ], + "optional":true, + "name":"abc.public.dts_source_all111.Value", + "field":"after" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"version" + }, + { + "type":"string", + "optional":false, + "field":"connector" + }, + { + "type":"string", + "optional":false, + "field":"name" + }, + { + "type":"int64", + "optional":false, + "field":"ts_ms" + }, + { + "type":"string", + "optional":true, + "name":"io.debezium.data.Enum", + "version":1, + "parameters":{ + "allowed":"true,last,false" + }, + "default":"false", + "field":"snapshot" + }, + { + "type":"string", + "optional":false, + "field":"db" + }, + { + "type":"string", + "optional":true, + "field":"sequence" + }, + { + "type":"string", + "optional":false, + "field":"schema" + }, + { + "type":"string", + "optional":false, + "field":"table" + }, + { + "type":"int64", + "optional":true, + "field":"txId" + }, + { + "type":"int64", + "optional":true, + "field":"lsn" + }, + { + "type":"int64", + "optional":true, + "field":"xmin" + } + ], + "optional":false, + "name":"io.debezium.connector.postgresql.Source", + "field":"source" + }, + { + "type":"string", + "optional":false, + "field":"op" + }, + { + "type":"int64", + "optional":true, + "field":"ts_ms" + }, + { + "type":"struct", + "fields":[ + { + "type":"string", + "optional":false, + "field":"id" + }, + { + "type":"int64", + "optional":false, + "field":"total_order" + }, + { + "type":"int64", + "optional":false, + "field":"data_collection_order" + } + ], + "optional":true, + "field":"transaction" + } + ], + "optional":false, + "name":"abc.public.dts_source_all111.Envelope" + }, + "payload":{ + "before":{ + "id":10048, + "bigint_info":null, + "bigserial_info":0, + "bit_info":null, + "bit_varying_info":null, + "boolean_info":null, + "bytea_info":null, + "character_varying_info":null, + "character_info":null, + "cidr_info":null, + "date":null, + "double_precision_info":null, + "inet_info":null, + "integer_info":null, + "interval_info":null, + "macaddr_info":null, + "money_info":null, + "numeric_info":null, + "decimal_info":null, + "real_info":null, + "point_info":null, + "smallint_info":null, + "smallserial_info":0, + "serial_info":0, + "text_info":null, + "time_info":null, + "timestamp_info":null, + "uuid_info":null, + "xml_info":null, + "json_info":null, + "int_list":null, + "txt_list":null + }, + "after":{ + "id":10048, + "bigint_info":4287444545095, + "bigserial_info":2, + "bit_info":"BQ==", + "bit_varying_info":"BQ==", + "boolean_info":false, + "bytea_info":"XHhERUFEQkVFRg==", + "character_varying_info":"hello world", + "character_info":"hello ", + "cidr_info":"192.168.100.128/32", + "date":19300, + "double_precision_info":12.123123123, + "inet_info":"192.168.100.128", + "integer_info":10000, + "interval_info":31536000000000, + "macaddr_info":"08:00:2b:01:02:03", + "money_info":"BrHASw==", + "numeric_info":1233.12312, + "decimal_info":123123.123123123, + "real_info":123123.125, + "point_info":{ + "x":123, + "y":123, + "wkb":"AQEAAAAAAAAAAMBeQAAAAAAAwF5A", + "srid":null + }, + "smallint_info":10000, + "smallserial_info":0, + "serial_info":0, + "text_info":"我们还是类别品牌重要更新.会员价格原因密码只要.很多最后用户详细结果增加是一.\\n解决还是全部准备产品.数据其实中国文化.\\n电脑那么历史只要功能国家其他.来源任何国际自己人员已经.分析有关我的一下信息应该.\\n学习标题表示网络认为.计划科技得到一样电脑他们搜索一起.对于国际点击搜索研究表示记者.\\n起来教育搜索.自己网站的人免费就是所有更新.", + "time_info":13903000000, + "timestamp_info":1667533903000000, + "uuid_info":"a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11", + "xml_info":"Manual...", + "json_info":"{\"a\": 1, \"b\": 2}", + "int_list":[ + 100, + 200, + 300 + ], + "txt_list":[ + "[Ljava.lang.String;@5f9c8d5d", + "[Ljava.lang.String;@1c2f1cbe" + ] + }, + "source":{ + "version":"1.6.4.Final", + "connector":"postgresql", + "name":"abc", + "ts_ms":1682325982454, + "snapshot":"false", + "db":"test_cdc", + "sequence":"[null,\"434293792\"]", + "schema":"public", + "table":"dts_source_all111", + "txId":201661, + "lsn":434294064, + "xmin":null + }, + "op":"u", + "ts_ms":1682325984233, + "transaction":null + } +} \ No newline at end of file diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java index 6e0bb93ed..1e902e4ed 100644 --- a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java @@ -107,7 +107,7 @@ private Map mockCatalogTables() { .catalogTableSchema(CatalogTableSchema.builder() .columns(Lists.newArrayList( CatalogTableColumn.builder() - .type(TypeInfos.INT_TYPE_INFO) + .type(TypeInfos.LONG_TYPE_INFO) .name("int_type") .build() )).build() @@ -117,7 +117,7 @@ private Map mockCatalogTables() { .catalogTableSchema(CatalogTableSchema.builder() .columns(Lists.newArrayList( CatalogTableColumn.builder() - .type(TypeInfos.DOUBLE_TYPE_INFO) + .type(TypeInfos.BIG_DECIMAL_TYPE_INFO) .name("double_type") .build() )).build() From 5dd60046705fbe022b15a2c7c332e4a366349b45 Mon Sep 17 00:00:00 2001 From: haoke Date: Fri, 28 Apr 2023 10:47:24 +0800 Subject: [PATCH 07/14] [BitSail][Multi-Sink]fixed bugs. --- .../base/extension/SupportProducedType.java | 26 ++++++++++++ bitsail-common/pom.xml | 1 - .../catalog/table/CatalogTableColumn.java | 2 + .../bitsail/common/catalog/table/TableId.java | 22 ++++++++++ .../bitsail/common/row/MultipleTableRow.java | 2 +- .../common/typeinfo/TypeInfoCompatibles.java | 10 ++--- .../bitsail-component-formats-flink/pom.xml | 6 +++ .../DebeziumRowDeserializationSchema.java | 8 ++-- .../bitsail-flink-row-parser/pom.xml | 6 +++ .../bitsail-connector-hive/pom.xml | 14 +++++++ .../bitsail-connectors-legacy/pom.xml | 17 ++++++++ .../connector-cdc/connector-cdc-base/pom.xml | 16 +++++++- .../cdc/util/MultipleTableRowUtils.java | 41 +++++++++++++++++++ .../connector-cdc/connector-cdc-mysql/pom.xml | 2 +- .../cdc/mysql/source/MysqlCDCSource.java | 10 ++++- .../debezium/MysqlBinlogSplitReader.java | 17 ++------ bitsail-connectors/connector-cdc/pom.xml | 16 +++++--- .../main/resources/doris-type-converter.yaml | 3 ++ .../sink/multiple/MultipleTableCommitter.java | 3 +- .../sink/multiple/MultipleTableSink.java | 6 +-- .../sink/multiple/MultipleTableWriter.java | 8 +++- .../bitsail-core-flink-1.11-bridge/pom.xml | 10 +++++ .../reader/delegate/DelegateFlinkSource.java | 15 ++++--- 23 files changed, 217 insertions(+), 44 deletions(-) create mode 100644 bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportProducedType.java create mode 100644 bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java diff --git a/bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportProducedType.java b/bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportProducedType.java new file mode 100644 index 000000000..49e3e00ca --- /dev/null +++ b/bitsail-base/src/main/java/com/bytedance/bitsail/base/extension/SupportProducedType.java @@ -0,0 +1,26 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.base.extension; + +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; + +import java.io.Serializable; + +public interface SupportProducedType extends Serializable { + + RowTypeInfo getProducedType(); +} diff --git a/bitsail-common/pom.xml b/bitsail-common/pom.xml index 592da6c14..8acf05e3c 100644 --- a/bitsail-common/pom.xml +++ b/bitsail-common/pom.xml @@ -31,7 +31,6 @@ false - ${project.parent.basedir} diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableColumn.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableColumn.java index 3911849ec..4e8a0e5f1 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableColumn.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/CatalogTableColumn.java @@ -37,6 +37,8 @@ public class CatalogTableColumn implements Serializable { private String comment; + private Object defaultValue; + public CatalogTableColumn(String name, TypeInfo type) { this.name = name; this.type = type; diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java index 35a08fffe..39000c5c2 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/catalog/table/TableId.java @@ -60,6 +60,28 @@ public static TableId of(String database, String schema, String table) { return new TableId(database, schema, table); } + public String getQuotedName() { + return getQuotedName("`"); + } + + public String getQuotedName(String quote) { + StringBuilder builder = new StringBuilder(); + builder.append(quote) + .append(database) + .append(quote) + .append("."); + if (schema != null) { + builder.append(quote) + .append(schema) + .append(quote) + .append("."); + } + builder.append(quote) + .append(table) + .append(quote); + return builder.toString(); + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java index c5d549607..709b7899b 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/row/MultipleTableRow.java @@ -35,7 +35,7 @@ @Builder public class MultipleTableRow implements Serializable { - private static final RowTypeInfo MULTIPLE_TABLE_ROW_TYPE_INFO = + public static final RowTypeInfo MULTIPLE_TABLE_ROW_TYPE_INFO = new RowTypeInfo(Arrays.stream(MultipleTableField.values()) .map(MultipleTableField::getName) .collect(Collectors.toList()) diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java index 69c4dd8e9..c6e6828f7 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/typeinfo/TypeInfoCompatibles.java @@ -55,12 +55,12 @@ public class TypeInfoCompatibles implements Serializable { private final BitSailConfiguration commonConfiguration; - private final DateTimeFormatter dateFormatter; - private final DateTimeFormatter timeFormatter; - private final DateTimeFormatter dateTimeFormatter; - private final ZoneId dateTimeZone; + private final transient DateTimeFormatter dateFormatter; + private final transient DateTimeFormatter timeFormatter; + private final transient DateTimeFormatter dateTimeFormatter; + private final transient ZoneId dateTimeZone; - private final HashBasedTable, TypeInfo, Function> compatibles; + private final transient HashBasedTable, TypeInfo, Function> compatibles; public TypeInfoCompatibles(BitSailConfiguration commonConfiguration) { this.commonConfiguration = commonConfiguration; diff --git a/bitsail-components/bitsail-component-formats-flink/pom.xml b/bitsail-components/bitsail-component-formats-flink/pom.xml index c73cb22a1..bb0f4e585 100644 --- a/bitsail-components/bitsail-component-formats-flink/pom.xml +++ b/bitsail-components/bitsail-component-formats-flink/pom.xml @@ -64,6 +64,12 @@ bitsail-core-flink-1.11-bridge ${revision} provided + + + com.bytedance.bitsail + bitsail-core-common + + diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java index ae02dd90a..271dff46b 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java @@ -124,13 +124,15 @@ public Row convert(Struct struct, Schema schema, String[] fieldNames, RowKind ro Row row = new Row(fieldNames.length); row.setKind(rowKind); for (int index = 0; index < fieldNames.length; index++) { - Object withoutDefault = struct.getWithoutDefault(fieldNames[index]); Field field = schema.field(fieldNames[index]); - if (Objects.isNull(withoutDefault)) { + if (Objects.isNull(field)) { row.setField(index, null); } else { + Object withoutDefault = struct.getWithoutDefault(fieldNames[index]); try { - row.setField(index, convert(field.schema(), withoutDefault)); + withoutDefault = Objects.isNull(withoutDefault) ? null + : convert(field.schema(), withoutDefault); + row.setField(index, withoutDefault); } catch (BitSailException e) { LOG.error("Failed to parse field {} from value {}.", field.name(), withoutDefault); throw e; diff --git a/bitsail-components/bitsail-flink-row-parser/pom.xml b/bitsail-components/bitsail-flink-row-parser/pom.xml index fe413b0ca..f5c5da317 100644 --- a/bitsail-components/bitsail-flink-row-parser/pom.xml +++ b/bitsail-components/bitsail-flink-row-parser/pom.xml @@ -133,6 +133,12 @@ bitsail-core-flink-1.11-bridge ${revision} provided + + + com.bytedance.bitsail + bitsail-core-common + + diff --git a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/pom.xml b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/pom.xml index 50519cdfc..13180d386 100644 --- a/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/pom.xml +++ b/bitsail-connectors/bitsail-connectors-legacy/bitsail-connector-hive/pom.xml @@ -26,6 +26,16 @@ bitsail-connector-hive + + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + 2.10.5 + + + + @@ -97,6 +107,10 @@ jersey-client com.sun.jersey + + jackson-module-jaxb-annotations + com.fasterxml.jackson.module + diff --git a/bitsail-connectors/bitsail-connectors-legacy/pom.xml b/bitsail-connectors/bitsail-connectors-legacy/pom.xml index c88b9082b..7d8cd45bd 100644 --- a/bitsail-connectors/bitsail-connectors-legacy/pom.xml +++ b/bitsail-connectors/bitsail-connectors-legacy/pom.xml @@ -67,12 +67,29 @@ ${scala-2.11.version} 2.11 + + + + + org.reflections + reflections + 0.9.10 + + + + com.bytedance.bitsail bitsail-core-flink-1.11-bridge ${revision} provided + + + com.bytedance.bitsail + bitsail-core-common + + diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml b/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml index 13877c03a..80ec0897e 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml @@ -33,7 +33,21 @@ - + + io.debezium + debezium-embedded + ${debezium.version} + + + jakarta.activation + jakarta.activation-api + + + org.slf4j + slf4j-log4j12 + + + \ No newline at end of file diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java new file mode 100644 index 000000000..524aa69dd --- /dev/null +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java @@ -0,0 +1,41 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.connector.cdc.util; + +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.row.MultipleTableRow; + +import org.apache.commons.lang3.StringUtils; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; + +import static io.debezium.data.Envelope.FieldName.SOURCE; + +public class MultipleTableRowUtils { + + public static MultipleTableRow fromSourceRecord(SourceRecord record, byte[] serialized) { + Struct value = (Struct) record.value(); + TableId tableId = TableId + .of(value.getStruct(SOURCE).getString("db"), value.getStruct(SOURCE).getString("table")); + return MultipleTableRow + .of(tableId.toString(), + null, + new String(serialized), + StringUtils.EMPTY, + StringUtils.EMPTY); + } +} diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml b/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml index 24d5b97c6..57076fec0 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml @@ -31,7 +31,7 @@ 8 8 UTF-8 - 1.6.4.Final + 3.0.8 diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java index 1e99e2daf..32e139770 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java @@ -17,7 +17,10 @@ package com.bytedance.bitsail.connector.cdc.mysql.source; import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; +import com.bytedance.bitsail.base.extension.SupportProducedType; +import com.bytedance.bitsail.common.row.MultipleTableRow; import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; import com.bytedance.bitsail.connector.cdc.mysql.source.reader.MysqlCDCSourceReader; import com.bytedance.bitsail.connector.cdc.mysql.source.split.MysqlSplitSerializer; import com.bytedance.bitsail.connector.cdc.source.BaseCDCSource; @@ -30,7 +33,7 @@ /** * Source to read mysql binlog. */ -public class MysqlCDCSource extends BaseCDCSource { +public class MysqlCDCSource extends BaseCDCSource implements SupportProducedType { private static final Logger LOG = LoggerFactory.getLogger(MysqlCDCSource.class); @Override @@ -48,4 +51,9 @@ public BaseSplitSerializer createSplitSerializer() { public String getReaderName() { return "mysql_cdc"; } + + @Override + public RowTypeInfo getProducedType() { + return MultipleTableRow.MULTIPLE_TABLE_ROW_TYPE_INFO; + } } diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java index 160cf30b6..62c6a7b88 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java @@ -17,7 +17,6 @@ package com.bytedance.bitsail.connector.cdc.mysql.source.debezium; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; -import com.bytedance.bitsail.common.row.BinlogRow; import com.bytedance.bitsail.common.row.Row; import com.bytedance.bitsail.component.format.debezium.JsonDebeziumSerializationSchema; import com.bytedance.bitsail.connector.cdc.mysql.source.config.MysqlConfig; @@ -26,6 +25,7 @@ import com.bytedance.bitsail.connector.cdc.option.BinlogReaderOptions; import com.bytedance.bitsail.connector.cdc.source.reader.BinlogSplitReader; import com.bytedance.bitsail.connector.cdc.source.split.BinlogSplit; +import com.bytedance.bitsail.connector.cdc.util.MultipleTableRowUtils; import com.github.shyiko.mysql.binlog.BinaryLogClient; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -52,8 +52,6 @@ import io.debezium.schema.TopicSelector; import io.debezium.util.Clock; import io.debezium.util.SchemaNameAdjuster; -import org.apache.kafka.connect.data.Field; -import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -283,17 +281,8 @@ public Row poll() { SourceRecord record = this.recordIterator.next(); this.offset = record.sourceOffset(); byte[] serialized = this.serializer.serialize(record); - Struct val = (Struct) record.value(); - Field keyField = record.keySchema().fields().get(0); - Row result = new Row(BinlogRow.ROW_SIZE); - result.setField(BinlogRow.DATABASE_INDEX, val.getStruct("source").getString("db")); - result.setField(BinlogRow.TABLE_INDEX, val.getStruct("source").getString("table")); - result.setField(BinlogRow.KEY_INDEX, ((Struct) record.key()).get(keyField).toString()); - result.setField(BinlogRow.TIMESTAMP_INDEX, val.getStruct("source").getInt64("ts_ms").toString()); - result.setField(BinlogRow.DDL_FLAG_INDEX, false); - result.setField(BinlogRow.VERSION_INDEX, 1); - result.setField(BinlogRow.VALUE_INDEX, serialized); - return result; + return MultipleTableRowUtils.fromSourceRecord(record, serialized) + .asRow(); } @Override diff --git a/bitsail-connectors/connector-cdc/pom.xml b/bitsail-connectors/connector-cdc/pom.xml index 49b71610f..593eb0f3c 100644 --- a/bitsail-connectors/connector-cdc/pom.xml +++ b/bitsail-connectors/connector-cdc/pom.xml @@ -33,13 +33,17 @@ - 8 - 8 - UTF-8 + 1.6.4.Final - - - + + + + io.debezium + debezium-embedded + ${debezium.version} + + + \ No newline at end of file diff --git a/bitsail-connectors/connector-doris/src/main/resources/doris-type-converter.yaml b/bitsail-connectors/connector-doris/src/main/resources/doris-type-converter.yaml index 93ba709cc..9b7c4cb3f 100644 --- a/bitsail-connectors/connector-doris/src/main/resources/doris-type-converter.yaml +++ b/bitsail-connectors/connector-doris/src/main/resources/doris-type-converter.yaml @@ -59,6 +59,9 @@ engine.type.to.bitsail.type.converter: - source.type: date target.type: date.date + - source.type: bit + target.type: byte + - source.type: datetime target.type: date.datetime diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java index f568debb6..b7841e03d 100644 --- a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableCommitter.java @@ -53,7 +53,8 @@ public List> commit(List> committer = supplier.createCommitter(configuration); - multipleCommitters.put(tableId, committer.get()); + realWriterCommitter = committer.get(); + multipleCommitters.put(tableId, realWriterCommitter); } realWriterCommitter.commit(committable.getCommits()); } diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java index cbac3c028..79ecadbde 100644 --- a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java @@ -62,7 +62,6 @@ public class MultipleTableSink realSink, CatalogFactory factory) { @@ -76,9 +75,9 @@ public MultipleTableSink(Sink realSink, public void configure(BitSailConfiguration commonConfiguration, BitSailConfiguration writerConfiguration) throws Exception { this.commonConfiguration = commonConfiguration; this.writerConfiguration = writerConfiguration; - this.valueConverter = new TypeInfoValueConverter(commonConfiguration); this.patternOfTable = Pattern.compile(writerConfiguration.get(WriterOptions.BaseWriterOptions.TABLE_PATTERN)); this.catalog = factory.createTableCatalog(BuilderGroup.WRITER, writerConfiguration); + this.catalog.open(realSink.createTypeInfoConverter()); this.catalogTables = Maps.newHashMap(); List tableIds = catalog.listTables(); @@ -89,6 +88,7 @@ public void configure(BitSailConfiguration commonConfiguration, BitSailConfigura catalogTables.put(tableId, catalogTable); } } + this.realSink.configure(commonConfiguration, writerConfiguration); } @Override @@ -97,7 +97,7 @@ public Writer, MultipleTableState( writerConfiguration, context, - valueConverter, + new TypeInfoValueConverter(commonConfiguration), (SupportMultipleSinkTable) realSink, catalogTables, patternOfTable, diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java index 8508a2754..aa9efeb5e 100644 --- a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java @@ -25,6 +25,7 @@ import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.exception.CommonErrorCode; +import com.bytedance.bitsail.common.option.WriterOptions; import com.bytedance.bitsail.common.row.MultipleTableRow; import com.bytedance.bitsail.common.row.Row; import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; @@ -62,6 +63,7 @@ public class MultipleTableWriter> restoredMultiTableWriters; private transient Map> processedMultiTableWriters; @@ -84,6 +86,7 @@ public MultipleTableWriter(BitSailConfiguration templateConfiguration, this.restoredMultiTableWriters = Maps.newConcurrentMap(); this.tableIdRowTypeInfos = Maps.newConcurrentMap(); this.patternOfTable = patternOfTable; + this.database = templateConfiguration.get(WriterOptions.BaseWriterOptions.DB_NAME); restore(); } @@ -110,9 +113,10 @@ private void restore() { public void write(Row element) throws IOException { MultipleTableRow multipleTableRow = MultipleTableRow.of(element); TableId tableId = TableId.of(multipleTableRow.getTableId()); + tableId = TableId.of(database, tableId.getTable()); if (!(patternOfTable.matcher(tableId.getTable()).find())) { - LOG.warn("Table {} not match with pattern: {}.", tableId.getTable(), patternOfTable.pattern()); + LOG.debug("Table {} not match with pattern: {}.", tableId.getTable(), patternOfTable.pattern()); return; } @@ -161,7 +165,7 @@ public void write(Row element) throws IOException { context.getIndexOfSubTaskId(), rowTypeInfo.getFieldNames()[index], deserialize.getField(index), - rowTypeInfo.getTypeInfos()[index]); + rowTypeInfo.getTypeInfos()[index], e); //handled as dirty record. throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, String.format("Subtask %s failed to convert field name %s to dest type info %S.", diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/pom.xml b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/pom.xml index 2c6f1650f..95ca305fd 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/pom.xml +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/pom.xml @@ -33,6 +33,16 @@ 2.11 + + + + org.reflections + reflections + 0.9.10 + + + + com.bytedance.bitsail diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSource.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSource.java index 070e96838..1e1d47a7f 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSource.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/reader/delegate/DelegateFlinkSource.java @@ -18,6 +18,7 @@ import com.bytedance.bitsail.base.connector.reader.v1.SourceSplit; import com.bytedance.bitsail.base.dirty.AbstractDirtyCollector; +import com.bytedance.bitsail.base.extension.SupportProducedType; import com.bytedance.bitsail.base.messenger.Messenger; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.model.ColumnInfo; @@ -62,11 +63,15 @@ public DelegateFlinkSource(com.bytedance.bitsail.base.connector.reader.v1.Source this.source = source; this.commonConfiguration = commonConfiguration; this.readerConfiguration = readerConfiguration; - List columnInfos = readerConfiguration - .get(ReaderOptions.BaseReaderOptions.COLUMNS); - - this.rowTypeInfo = TypeInfoUtils - .getRowTypeInfo(source.createTypeInfoConverter(), columnInfos); + if (source instanceof SupportProducedType) { + this.rowTypeInfo = ((SupportProducedType) source).getProducedType(); + } else { + List columnInfos = readerConfiguration + .get(ReaderOptions.BaseReaderOptions.COLUMNS); + + this.rowTypeInfo = TypeInfoUtils + .getRowTypeInfo(source.createTypeInfoConverter(), columnInfos); + } this.dirtyCollector = dirtyCollector; this.messenger = messenger; } From 967e28d80b8857b7803579be302824d3c017e400 Mon Sep 17 00:00:00 2001 From: haoke Date: Fri, 28 Apr 2023 11:30:02 +0800 Subject: [PATCH 08/14] [BitSail][Multi-Sink]fixed ut issue. --- .../core/common/sink/multiple/MultipleTableWriterTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java index 1e902e4ed..9199e8c1b 100644 --- a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java @@ -23,6 +23,7 @@ import com.bytedance.bitsail.common.catalog.table.CatalogTableSchema; import com.bytedance.bitsail.common.catalog.table.TableId; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.option.WriterOptions; import com.bytedance.bitsail.common.row.MultipleTableRow; import com.bytedance.bitsail.common.row.Row; import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; @@ -66,6 +67,7 @@ public class MultipleTableWriterTest { public void before() { this.multiTablePrintSink = new MultiTablePrintSink(); this.jobConf = BitSailConfiguration.newDefault(); + this.jobConf.set(WriterOptions.BaseWriterOptions.DB_NAME, "default"); this.catalogTables = mockCatalogTables(); this.typeInfoValueConverter = new TypeInfoValueConverter(BitSailConfiguration.newDefault()); this.context = new Writer.Context>() { From 7a938a60a649b858ce7726c012070a94d9409996 Mon Sep 17 00:00:00 2001 From: haoke Date: Fri, 28 Apr 2023 17:54:05 +0800 Subject: [PATCH 09/14] [BitSail][Multi-Sink]fixed ut issue#1. --- .../bitsail/common/option/ReaderOptions.java | 4 + .../DebeziumDeserializationSchema.java | 9 +- .../DebeziumJsonDeserializationSchema.java | 79 +++++ .../DebeziumRowDeserializationSchema.java | 306 +--------------- ...umRowFilterNamesDeserializationSchema.java | 327 ++++++++++++++++++ ...MultipleDebeziumDeserializationSchema.java | 77 +++++ ...FilterNamesDeserializationSchemaTest.java} | 14 +- .../connector-cdc/connector-cdc-base/pom.xml | 7 + .../cdc/option/BinlogReaderOptions.java | 4 + .../DebeziumDeserializationFactory.java | 49 +++ .../connector/cdc/source/BaseCDCSource.java | 13 +- .../source/reader/BaseCDCSourceReader.java | 9 +- .../cdc/util/MultipleTableRowUtils.java | 41 --- .../connector-cdc/connector-cdc-mysql/pom.xml | 6 - .../cdc/mysql/source/MysqlCDCSource.java | 12 +- .../debezium/MysqlBinlogSplitReader.java | 18 +- .../source/reader/MysqlCDCSourceReader.java | 11 +- .../cdc/mysql/source/MockConnectionsTest.java | 9 +- .../source/context/SourceMockContext.java | 46 +++ .../sink/multiple/MultipleTableSink.java | 4 +- .../sink/multiple/MultipleTableWriter.java | 6 +- .../multiple/MultipleTableWriterTest.java | 4 +- ...MysqlBinlogSplitReaderContainerITCase.java | 10 +- .../cdc/mysql/context/SourceMockContext.java | 46 +++ 24 files changed, 730 insertions(+), 381 deletions(-) create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchema.java create mode 100644 bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java rename bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/{DebeziumRowDeserializationSchemaTest.java => DebeziumRowFilterNamesDeserializationSchemaTest.java} (83%) create mode 100644 bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/schema/DebeziumDeserializationFactory.java delete mode 100644 bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java create mode 100644 bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/context/SourceMockContext.java create mode 100644 bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/context/SourceMockContext.java diff --git a/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/ReaderOptions.java b/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/ReaderOptions.java index f6d6d6306..3b2358262 100644 --- a/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/ReaderOptions.java +++ b/bitsail-common/src/main/java/com/bytedance/bitsail/common/option/ReaderOptions.java @@ -107,5 +107,9 @@ interface BaseReaderOptions { ConfigOption CONTENT_TYPE = key(READER_PREFIX + "content_type") .noDefaultValue(String.class); + + ConfigOption MULTIPLE_READER_ENABLED = + key(READER_PREFIX + "multiple_reader_enabled") + .defaultValue(false); } } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java index 6fb9628c6..83cdb169c 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java @@ -16,14 +16,17 @@ package com.bytedance.bitsail.component.format.debezium; +import com.bytedance.bitsail.base.extension.SupportProducedType; import com.bytedance.bitsail.base.format.DeserializationSchema; +import com.bytedance.bitsail.common.row.Row; import org.apache.kafka.connect.source.SourceRecord; -public interface DebeziumDeserializationSchema extends DeserializationSchema { +public interface DebeziumDeserializationSchema extends DeserializationSchema, SupportProducedType { + @Override - SourceRecord deserialize(byte[] message); + Row deserialize(SourceRecord sourceRecord); @Override - boolean isEndOfStream(SourceRecord nextElement); + boolean isEndOfStream(Row nextElement); } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java new file mode 100644 index 000000000..ffc9faef4 --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java @@ -0,0 +1,79 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.component.format.debezium; + +import com.bytedance.bitsail.base.extension.SupportProducedType; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfos; +import com.bytedance.bitsail.component.format.debezium.option.DebeziumWriterOptions; + +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.json.JsonConverterConfig; +import org.apache.kafka.connect.source.SourceRecord; +import org.apache.kafka.connect.storage.ConverterConfig; +import org.apache.kafka.connect.storage.ConverterType; + +import java.util.HashMap; + +public class DebeziumJsonDeserializationSchema implements DebeziumDeserializationSchema, SupportProducedType { + public static final String NAME = "debezium-json"; + + public static final RowTypeInfo DEBEZIUM_JSON_ROW_TYPE = + new RowTypeInfo( + new String[] {"topic", "key", "value", "timestamp"}, + new TypeInfo[] { + TypeInfos.STRING_TYPE_INFO, TypeInfos.STRING_TYPE_INFO, TypeInfos.STRING_TYPE_INFO, TypeInfos.LONG_TYPE_INFO + }); + + private transient JsonConverter jsonConverter; + + public DebeziumJsonDeserializationSchema(BitSailConfiguration jobConf) { + this.jsonConverter = new JsonConverter(); + boolean includeSchema = jobConf.get(DebeziumWriterOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); + final HashMap configs = new HashMap<>(); + configs.put(ConverterConfig.TYPE_CONFIG, ConverterType.VALUE.getName()); + configs.put(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, includeSchema); + jsonConverter.configure(configs); + } + + @Override + public RowTypeInfo getProducedType() { + return DEBEZIUM_JSON_ROW_TYPE; + } + + @SuppressWarnings("checkstyle:MagicNumber") + @Override + public Row deserialize(SourceRecord sourceRecord) { + byte[] key = jsonConverter.fromConnectData(sourceRecord.topic(), sourceRecord.keySchema(), sourceRecord.key()); + byte[] value = jsonConverter.fromConnectData(sourceRecord.topic(), sourceRecord.valueSchema(), sourceRecord.value()); + + Object[] values = new Object[DEBEZIUM_JSON_ROW_TYPE.getFieldNames().length]; + values[0] = sourceRecord.topic(); + values[1] = new String(key); + values[2] = new String(value); + values[3] = sourceRecord.timestamp(); + return new Row(values); + } + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } +} diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java index 271dff46b..9d2889b78 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java @@ -16,312 +16,36 @@ package com.bytedance.bitsail.component.format.debezium; -import com.bytedance.bitsail.base.format.DeserializationSchema; -import com.bytedance.bitsail.common.BitSailException; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; -import com.bytedance.bitsail.common.exception.CommonErrorCode; import com.bytedance.bitsail.common.row.Row; -import com.bytedance.bitsail.common.row.RowKind; -import com.bytedance.bitsail.component.format.debezium.option.DebeziumReaderOptions; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; -import io.debezium.data.Enum; -import io.debezium.data.EnumSet; -import io.debezium.data.Envelope; -import io.debezium.data.Json; -import io.debezium.data.SpecialValueDecimal; -import io.debezium.data.VariableScaleDecimal; -import io.debezium.time.Date; -import io.debezium.time.MicroTime; -import io.debezium.time.MicroTimestamp; -import io.debezium.time.NanoTime; -import io.debezium.time.NanoTimestamp; -import io.debezium.time.Time; -import io.debezium.time.Timestamp; -import io.debezium.time.Year; -import io.debezium.time.ZonedTime; -import io.debezium.time.ZonedTimestamp; -import org.apache.commons.lang3.math.NumberUtils; -import org.apache.kafka.connect.data.Decimal; -import org.apache.kafka.connect.data.Field; -import org.apache.kafka.connect.data.Schema; -import org.apache.kafka.connect.data.SchemaAndValue; -import org.apache.kafka.connect.data.Struct; -import org.apache.kafka.connect.json.JsonConverter; -import org.apache.kafka.connect.json.JsonConverterConfig; -import org.apache.kafka.connect.storage.ConverterConfig; -import org.apache.kafka.connect.storage.ConverterType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.kafka.connect.source.SourceRecord; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.time.Instant; -import java.time.ZoneId; -import java.util.HashMap; -import java.util.Objects; - -import static io.debezium.data.Envelope.FieldName.AFTER; -import static io.debezium.data.Envelope.FieldName.BEFORE; -import static io.debezium.data.Envelope.FieldName.OPERATION; -import static org.apache.kafka.connect.data.Values.convertToDate; -import static org.apache.kafka.connect.data.Values.convertToTime; -import static org.apache.kafka.connect.data.Values.convertToTimestamp; - -public class DebeziumRowDeserializationSchema implements DeserializationSchema { - private static final Logger LOG = LoggerFactory.getLogger(DebeziumRowDeserializationSchema.class); +public class DebeziumRowDeserializationSchema implements DebeziumDeserializationSchema { private final BitSailConfiguration jobConf; - private final JsonConverter jsonConverter; - public DebeziumRowDeserializationSchema(BitSailConfiguration jobConf) { + private final RowTypeInfo rowTypeInfo; + + public DebeziumRowDeserializationSchema(BitSailConfiguration jobConf, RowTypeInfo rowTypeInfo) { this.jobConf = jobConf; - this.jsonConverter = new JsonConverter(); - boolean includeSchema = jobConf.get(DebeziumReaderOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); - final HashMap configs = new HashMap<>(); - configs.put(ConverterConfig.TYPE_CONFIG, ConverterType.VALUE.getName()); - configs.put(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, includeSchema); - jsonConverter.configure(configs); + this.rowTypeInfo = rowTypeInfo; } @Override - public Row deserialize(String message) { - throw new UnsupportedOperationException("Please invoke DeserializationSchema#deserialize(byte[], fieldNames) instead."); - } - - public Row deserialize(String message, String[] fieldNames) { - SchemaAndValue schemaAndValue; - try { - schemaAndValue = jsonConverter.toConnectData(null, message.getBytes(StandardCharsets.UTF_8)); - } catch (Exception e) { - LOG.error("Can't parse content from format [debezium], content: {}.", message, e); - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Can't parse debezium json: %s.", message), e); - } - Struct value = (Struct) schemaAndValue.value(); - Envelope.Operation operation = Envelope.Operation.forCode(value.getString(OPERATION)); - - if (operation == Envelope.Operation.CREATE || operation == Envelope.Operation.READ) { - Struct after = value.getStruct(AFTER); - return convert(after, after.schema(), fieldNames, RowKind.INSERT); - } - - if (operation == Envelope.Operation.DELETE) { - Struct before = value.getStruct(BEFORE); - return convert(before, before.schema(), fieldNames, RowKind.DELETE); - } - - if (operation == Envelope.Operation.UPDATE) { - Struct after = value.getStruct(AFTER); - return convert(after, after.schema(), fieldNames, RowKind.UPDATE_AFTER); - } - - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Not support operation: %s right now.", operation)); - } - - public Row convert(Struct struct, Schema schema, String[] fieldNames, RowKind rowKind) { - Row row = new Row(fieldNames.length); - row.setKind(rowKind); - for (int index = 0; index < fieldNames.length; index++) { - Field field = schema.field(fieldNames[index]); - if (Objects.isNull(field)) { - row.setField(index, null); - } else { - Object withoutDefault = struct.getWithoutDefault(fieldNames[index]); - try { - withoutDefault = Objects.isNull(withoutDefault) ? null - : convert(field.schema(), withoutDefault); - row.setField(index, withoutDefault); - } catch (BitSailException e) { - LOG.error("Failed to parse field {} from value {}.", field.name(), withoutDefault); - throw e; - } - } - } - return row; - } - - private Object convert(Schema fieldSchema, Object withoutDefault) { - if (isPrimitiveType(fieldSchema)) { - return convertPrimitiveType(fieldSchema, withoutDefault); - } else { - //todo support local timestamp zone. - return convertOtherType(fieldSchema, withoutDefault, null); - } - } - - private static boolean isPrimitiveType(Schema fieldSchema) { - return fieldSchema.name() == null; - } - - private Object convertPrimitiveType(Schema fieldSchema, Object fieldValue) { - switch (fieldSchema.type()) { - case BOOLEAN: - return convertToBoolean(fieldValue); - case INT8: - case INT16: - case INT32: - return convertToInteger(fieldValue); - case INT64: - return convertToLong(fieldValue); - case FLOAT32: - return convertToFloat(fieldValue); - case FLOAT64: - return convertToDouble(fieldValue); - case STRING: - return convertToString(fieldValue); - case BYTES: - return convertToBinary(fieldValue); - default: - throw new UnsupportedOperationException("Not support type: " + fieldSchema.type()); - } - } - - private Object convertOtherType(Schema fieldSchema, Object fieldValue, ZoneId serverTimeZone) { - switch (fieldSchema.name()) { - case Enum.LOGICAL_NAME: - case Json.LOGICAL_NAME: - case EnumSet.LOGICAL_NAME: - return convertToString(fieldValue); - case Time.SCHEMA_NAME: - case MicroTime.SCHEMA_NAME: - case NanoTime.SCHEMA_NAME: - return convertToTime(fieldSchema, fieldValue); - case Timestamp.SCHEMA_NAME: - case MicroTimestamp.SCHEMA_NAME: - case NanoTimestamp.SCHEMA_NAME: - return convertToTimestamp(fieldSchema, fieldValue); - case Decimal.LOGICAL_NAME: - return convertToDecimal(fieldSchema, fieldValue); - case Date.SCHEMA_NAME: - return convertToDate(fieldSchema, fieldValue); - case Year.SCHEMA_NAME: - return convertToInteger(fieldValue); - case ZonedTime.SCHEMA_NAME: - case ZonedTimestamp.SCHEMA_NAME: - return convertToZoneTimeStamp(fieldSchema, fieldValue); - default: - throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, - String.format("Field name %s not support schema %s.", - fieldSchema.name(), - fieldSchema.schema() - ) - ); - } - } - - private byte[] convertToBinary(Object fieldValue) { - if (fieldValue instanceof byte[]) { - return (byte[]) fieldValue; - } else if (fieldValue instanceof ByteBuffer) { - ByteBuffer byteBuffer = (ByteBuffer) fieldValue; - byte[] bytes = new byte[byteBuffer.remaining()]; - byteBuffer.get(bytes); - return bytes; - } else { - throw new UnsupportedOperationException( - "Unsupported Binary value type: " + fieldValue.getClass().getSimpleName()); - } - } - - private String convertToString(Object fieldValue) { - return fieldValue.toString(); - } - - private Double convertToDouble(Object fieldValue) { - if (fieldValue instanceof Float) { - return ((Float) fieldValue).doubleValue(); - } else if (fieldValue instanceof Double) { - return (Double) fieldValue; - } else { - return Double.parseDouble(fieldValue.toString()); - } - } - - private Float convertToFloat(Object fieldValue) { - if (fieldValue instanceof Float) { - return (Float) fieldValue; - } else if (fieldValue instanceof Double) { - return ((Double) fieldValue).floatValue(); - } else { - return Float.parseFloat(fieldValue.toString()); - } - } - - private Long convertToLong(Object fieldValue) { - if (fieldValue instanceof Integer) { - return ((Integer) fieldValue).longValue(); - } else if (fieldValue instanceof Long) { - return (Long) fieldValue; - } else { - return Long.parseLong(fieldValue.toString()); - } - } - - private Boolean convertToBoolean(Object fieldValue) { - if (fieldValue instanceof Integer) { - return (Integer) fieldValue != 0; - } else if (fieldValue instanceof Long) { - return (Long) fieldValue != 0; - } else { - String str = fieldValue.toString(); - if (NumberUtils.isNumber(str)) { - return NumberUtils.createNumber(str).intValue() != 0; - } - return Boolean.parseBoolean(fieldValue.toString()); - } - } - - private Integer convertToInteger(Object fieldValue) { - if (fieldValue instanceof Integer) { - return (Integer) (fieldValue); - } else if (fieldValue instanceof Long) { - return ((Long) fieldValue).intValue(); - } else { - return Integer.parseInt(fieldValue.toString()); - } - } - - public Object convertToDecimal(Schema schema, Object fieldValue) { - BigDecimal bigDecimal; - if (fieldValue instanceof byte[]) { - // for decimal.handling.mode=precise - bigDecimal = Decimal.toLogical(schema, (byte[]) fieldValue); - } else if (fieldValue instanceof String) { - // for decimal.handling.mode=string - bigDecimal = new BigDecimal((String) fieldValue); - } else if (fieldValue instanceof Double) { - // for decimal.handling.mode=double - bigDecimal = BigDecimal.valueOf((Double) fieldValue); - } else { - if (VariableScaleDecimal.LOGICAL_NAME.equals(schema.name())) { - SpecialValueDecimal decimal = - VariableScaleDecimal.toLogical((Struct) fieldValue); - bigDecimal = decimal.getDecimalValue().orElse(BigDecimal.ZERO); - } else { - // fallback to string - bigDecimal = new BigDecimal(fieldValue.toString()); - } - } - return bigDecimal; - } - - private Object convertToZoneTimeStamp(Schema fieldSchema, Object fieldValue) { - if (fieldValue instanceof String) { - String str = (String) fieldValue; - Instant instant = Instant.parse(str); - //TODO zone timestamp - } - throw BitSailException.asBitSailException(CommonErrorCode.RUNTIME_ERROR, - String.format("Can't parse field [%s] from value %s to zone timestamp.", - fieldSchema.name(), - fieldValue - )); + public Row deserialize(SourceRecord sourceRecord) { + //TODO + return null; } @Override public boolean isEndOfStream(Row nextElement) { return false; } + + @Override + public RowTypeInfo getProducedType() { + return rowTypeInfo; + } } diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchema.java new file mode 100644 index 000000000..aa700631a --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchema.java @@ -0,0 +1,327 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.component.format.debezium; + +import com.bytedance.bitsail.base.format.DeserializationSchema; +import com.bytedance.bitsail.common.BitSailException; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.exception.CommonErrorCode; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.row.RowKind; +import com.bytedance.bitsail.component.format.debezium.option.DebeziumReaderOptions; + +import io.debezium.data.Enum; +import io.debezium.data.EnumSet; +import io.debezium.data.Envelope; +import io.debezium.data.Json; +import io.debezium.data.SpecialValueDecimal; +import io.debezium.data.VariableScaleDecimal; +import io.debezium.time.Date; +import io.debezium.time.MicroTime; +import io.debezium.time.MicroTimestamp; +import io.debezium.time.NanoTime; +import io.debezium.time.NanoTimestamp; +import io.debezium.time.Time; +import io.debezium.time.Timestamp; +import io.debezium.time.Year; +import io.debezium.time.ZonedTime; +import io.debezium.time.ZonedTimestamp; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.kafka.connect.data.Decimal; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaAndValue; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.json.JsonConverterConfig; +import org.apache.kafka.connect.storage.ConverterConfig; +import org.apache.kafka.connect.storage.ConverterType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.ZoneId; +import java.util.HashMap; +import java.util.Objects; + +import static io.debezium.data.Envelope.FieldName.AFTER; +import static io.debezium.data.Envelope.FieldName.BEFORE; +import static io.debezium.data.Envelope.FieldName.OPERATION; +import static org.apache.kafka.connect.data.Values.convertToDate; +import static org.apache.kafka.connect.data.Values.convertToTime; +import static org.apache.kafka.connect.data.Values.convertToTimestamp; + +public class DebeziumRowFilterNamesDeserializationSchema implements DeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(DebeziumRowFilterNamesDeserializationSchema.class); + + private final BitSailConfiguration jobConf; + private final JsonConverter jsonConverter; + + public DebeziumRowFilterNamesDeserializationSchema(BitSailConfiguration jobConf) { + this.jobConf = jobConf; + this.jsonConverter = new JsonConverter(); + boolean includeSchema = jobConf.get(DebeziumReaderOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); + final HashMap configs = new HashMap<>(); + configs.put(ConverterConfig.TYPE_CONFIG, ConverterType.VALUE.getName()); + configs.put(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, includeSchema); + jsonConverter.configure(configs); + } + + @Override + public Row deserialize(String message) { + throw new UnsupportedOperationException("Please invoke DeserializationSchema#deserialize(byte[], fieldNames) instead."); + } + + public Row deserialize(String message, String[] fieldNames) { + SchemaAndValue schemaAndValue; + try { + schemaAndValue = jsonConverter.toConnectData(null, message.getBytes(StandardCharsets.UTF_8)); + } catch (Exception e) { + LOG.error("Can't parse content from format [debezium], content: {}.", message, e); + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Can't parse debezium json: %s.", message), e); + } + Struct value = (Struct) schemaAndValue.value(); + Envelope.Operation operation = Envelope.Operation.forCode(value.getString(OPERATION)); + + if (operation == Envelope.Operation.CREATE || operation == Envelope.Operation.READ) { + Struct after = value.getStruct(AFTER); + return convert(after, after.schema(), fieldNames, RowKind.INSERT); + } + + if (operation == Envelope.Operation.DELETE) { + Struct before = value.getStruct(BEFORE); + return convert(before, before.schema(), fieldNames, RowKind.DELETE); + } + + if (operation == Envelope.Operation.UPDATE) { + Struct after = value.getStruct(AFTER); + return convert(after, after.schema(), fieldNames, RowKind.UPDATE_AFTER); + } + + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Not support operation: %s right now.", operation)); + } + + public Row convert(Struct struct, Schema schema, String[] fieldNames, RowKind rowKind) { + Row row = new Row(fieldNames.length); + row.setKind(rowKind); + for (int index = 0; index < fieldNames.length; index++) { + Field field = schema.field(fieldNames[index]); + if (Objects.isNull(field)) { + row.setField(index, null); + } else { + Object withoutDefault = struct.getWithoutDefault(fieldNames[index]); + try { + withoutDefault = Objects.isNull(withoutDefault) ? null + : convert(field.schema(), withoutDefault); + row.setField(index, withoutDefault); + } catch (BitSailException e) { + LOG.error("Failed to parse field {} from value {}.", field.name(), withoutDefault); + throw e; + } + } + } + return row; + } + + private Object convert(Schema fieldSchema, Object withoutDefault) { + if (isPrimitiveType(fieldSchema)) { + return convertPrimitiveType(fieldSchema, withoutDefault); + } else { + //todo support local timestamp zone. + return convertOtherType(fieldSchema, withoutDefault, null); + } + } + + private static boolean isPrimitiveType(Schema fieldSchema) { + return fieldSchema.name() == null; + } + + private Object convertPrimitiveType(Schema fieldSchema, Object fieldValue) { + switch (fieldSchema.type()) { + case BOOLEAN: + return convertToBoolean(fieldValue); + case INT8: + case INT16: + case INT32: + return convertToInteger(fieldValue); + case INT64: + return convertToLong(fieldValue); + case FLOAT32: + return convertToFloat(fieldValue); + case FLOAT64: + return convertToDouble(fieldValue); + case STRING: + return convertToString(fieldValue); + case BYTES: + return convertToBinary(fieldValue); + default: + throw new UnsupportedOperationException("Not support type: " + fieldSchema.type()); + } + } + + private Object convertOtherType(Schema fieldSchema, Object fieldValue, ZoneId serverTimeZone) { + switch (fieldSchema.name()) { + case Enum.LOGICAL_NAME: + case Json.LOGICAL_NAME: + case EnumSet.LOGICAL_NAME: + return convertToString(fieldValue); + case Time.SCHEMA_NAME: + case MicroTime.SCHEMA_NAME: + case NanoTime.SCHEMA_NAME: + return convertToTime(fieldSchema, fieldValue); + case Timestamp.SCHEMA_NAME: + case MicroTimestamp.SCHEMA_NAME: + case NanoTimestamp.SCHEMA_NAME: + return convertToTimestamp(fieldSchema, fieldValue); + case Decimal.LOGICAL_NAME: + return convertToDecimal(fieldSchema, fieldValue); + case Date.SCHEMA_NAME: + return convertToDate(fieldSchema, fieldValue); + case Year.SCHEMA_NAME: + return convertToInteger(fieldValue); + case ZonedTime.SCHEMA_NAME: + case ZonedTimestamp.SCHEMA_NAME: + return convertToZoneTimeStamp(fieldSchema, fieldValue); + default: + throw BitSailException.asBitSailException(CommonErrorCode.CONVERT_NOT_SUPPORT, + String.format("Field name %s not support schema %s.", + fieldSchema.name(), + fieldSchema.schema() + ) + ); + } + } + + private byte[] convertToBinary(Object fieldValue) { + if (fieldValue instanceof byte[]) { + return (byte[]) fieldValue; + } else if (fieldValue instanceof ByteBuffer) { + ByteBuffer byteBuffer = (ByteBuffer) fieldValue; + byte[] bytes = new byte[byteBuffer.remaining()]; + byteBuffer.get(bytes); + return bytes; + } else { + throw new UnsupportedOperationException( + "Unsupported Binary value type: " + fieldValue.getClass().getSimpleName()); + } + } + + private String convertToString(Object fieldValue) { + return fieldValue.toString(); + } + + private Double convertToDouble(Object fieldValue) { + if (fieldValue instanceof Float) { + return ((Float) fieldValue).doubleValue(); + } else if (fieldValue instanceof Double) { + return (Double) fieldValue; + } else { + return Double.parseDouble(fieldValue.toString()); + } + } + + private Float convertToFloat(Object fieldValue) { + if (fieldValue instanceof Float) { + return (Float) fieldValue; + } else if (fieldValue instanceof Double) { + return ((Double) fieldValue).floatValue(); + } else { + return Float.parseFloat(fieldValue.toString()); + } + } + + private Long convertToLong(Object fieldValue) { + if (fieldValue instanceof Integer) { + return ((Integer) fieldValue).longValue(); + } else if (fieldValue instanceof Long) { + return (Long) fieldValue; + } else { + return Long.parseLong(fieldValue.toString()); + } + } + + private Boolean convertToBoolean(Object fieldValue) { + if (fieldValue instanceof Integer) { + return (Integer) fieldValue != 0; + } else if (fieldValue instanceof Long) { + return (Long) fieldValue != 0; + } else { + String str = fieldValue.toString(); + if (NumberUtils.isNumber(str)) { + return NumberUtils.createNumber(str).intValue() != 0; + } + return Boolean.parseBoolean(fieldValue.toString()); + } + } + + private Integer convertToInteger(Object fieldValue) { + if (fieldValue instanceof Integer) { + return (Integer) (fieldValue); + } else if (fieldValue instanceof Long) { + return ((Long) fieldValue).intValue(); + } else { + return Integer.parseInt(fieldValue.toString()); + } + } + + public Object convertToDecimal(Schema schema, Object fieldValue) { + BigDecimal bigDecimal; + if (fieldValue instanceof byte[]) { + // for decimal.handling.mode=precise + bigDecimal = Decimal.toLogical(schema, (byte[]) fieldValue); + } else if (fieldValue instanceof String) { + // for decimal.handling.mode=string + bigDecimal = new BigDecimal((String) fieldValue); + } else if (fieldValue instanceof Double) { + // for decimal.handling.mode=double + bigDecimal = BigDecimal.valueOf((Double) fieldValue); + } else { + if (VariableScaleDecimal.LOGICAL_NAME.equals(schema.name())) { + SpecialValueDecimal decimal = + VariableScaleDecimal.toLogical((Struct) fieldValue); + bigDecimal = decimal.getDecimalValue().orElse(BigDecimal.ZERO); + } else { + // fallback to string + bigDecimal = new BigDecimal(fieldValue.toString()); + } + } + return bigDecimal; + } + + private Object convertToZoneTimeStamp(Schema fieldSchema, Object fieldValue) { + if (fieldValue instanceof String) { + String str = (String) fieldValue; + Instant instant = Instant.parse(str); + //TODO zone timestamp + } + throw BitSailException.asBitSailException(CommonErrorCode.RUNTIME_ERROR, + String.format("Can't parse field [%s] from value %s to zone timestamp.", + fieldSchema.name(), + fieldValue + )); + } + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } +} diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java new file mode 100644 index 000000000..b8fd170dd --- /dev/null +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java @@ -0,0 +1,77 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.component.format.debezium; + +import com.bytedance.bitsail.base.extension.SupportProducedType; +import com.bytedance.bitsail.common.catalog.table.TableId; +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.row.MultipleTableRow; +import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.component.format.debezium.option.DebeziumWriterOptions; + +import org.apache.commons.lang3.StringUtils; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.json.JsonConverterConfig; +import org.apache.kafka.connect.source.SourceRecord; +import org.apache.kafka.connect.storage.ConverterConfig; +import org.apache.kafka.connect.storage.ConverterType; + +import java.util.HashMap; + +import static io.debezium.data.Envelope.FieldName.SOURCE; + +public class MultipleDebeziumDeserializationSchema implements DebeziumDeserializationSchema, SupportProducedType { + + private transient JsonConverter jsonConverter; + + public MultipleDebeziumDeserializationSchema(BitSailConfiguration jobConf) { + this.jsonConverter = new JsonConverter(); + boolean includeSchema = jobConf.get(DebeziumWriterOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); + final HashMap configs = new HashMap<>(); + configs.put(ConverterConfig.TYPE_CONFIG, ConverterType.VALUE.getName()); + configs.put(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, includeSchema); + jsonConverter.configure(configs); + } + + @Override + public RowTypeInfo getProducedType() { + return MultipleTableRow.MULTIPLE_TABLE_ROW_TYPE_INFO; + } + + @Override + public Row deserialize(SourceRecord sourceRecord) { + Struct valueStruct = (Struct) sourceRecord.value(); + TableId tableId = TableId + .of(valueStruct.getStruct(SOURCE).getString("db"), valueStruct.getStruct(SOURCE).getString("table")); + byte[] connectData = jsonConverter.fromConnectData(sourceRecord.topic(), sourceRecord.valueSchema(), sourceRecord.value()); + MultipleTableRow multipleTableRow = MultipleTableRow + .of(tableId.toString(), + sourceRecord.topic(), + new String(connectData), + StringUtils.EMPTY, + StringUtils.EMPTY); + + return multipleTableRow.asRow(); + } + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } +} diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchemaTest.java similarity index 83% rename from bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java rename to bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchemaTest.java index 19a345d84..f9eb29ec7 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchemaTest.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/test/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowFilterNamesDeserializationSchemaTest.java @@ -33,18 +33,18 @@ import java.time.ZoneOffset; import java.util.Date; -public class DebeziumRowDeserializationSchemaTest { +public class DebeziumRowFilterNamesDeserializationSchemaTest { - private DebeziumRowDeserializationSchema deserializationSchema; + private DebeziumRowFilterNamesDeserializationSchema deserializationSchema; @Before public void before() { - deserializationSchema = new DebeziumRowDeserializationSchema(BitSailConfiguration.newDefault()); + deserializationSchema = new DebeziumRowFilterNamesDeserializationSchema(BitSailConfiguration.newDefault()); } @Test public void test() throws URISyntaxException, IOException { - byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowFilterNamesDeserializationSchemaTest .class.getClassLoader().getResource("file/debezium.json") .toURI().getPath())); @@ -57,7 +57,7 @@ public void test() throws URISyntaxException, IOException { @Test public void testInsert() throws URISyntaxException, IOException { - byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowFilterNamesDeserializationSchemaTest .class.getClassLoader().getResource("file/debezium_insert.json") .toURI().getPath())); @@ -70,7 +70,7 @@ public void testInsert() throws URISyntaxException, IOException { @Test public void testUpsert() throws URISyntaxException, IOException { - byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowFilterNamesDeserializationSchemaTest .class.getClassLoader().getResource("file/debezium_upsert.json") .toURI().getPath())); @@ -85,7 +85,7 @@ public void testUpsert() throws URISyntaxException, IOException { @Test public void testDelete() throws URISyntaxException, IOException { - byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowDeserializationSchemaTest + byte[] bytes = Files.readAllBytes(Paths.get(DebeziumRowFilterNamesDeserializationSchemaTest .class.getClassLoader().getResource("file/postgres/debezium_pg_delete.json") .toURI().getPath())); diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml b/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml index 80ec0897e..ff5a6f01d 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/pom.xml @@ -48,6 +48,13 @@ + + + com.bytedance.bitsail + bitsail-component-format-debezium + ${revision} + + \ No newline at end of file diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/option/BinlogReaderOptions.java b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/option/BinlogReaderOptions.java index 2561b1e28..c59688083 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/option/BinlogReaderOptions.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/option/BinlogReaderOptions.java @@ -81,4 +81,8 @@ public interface BinlogReaderOptions extends ReaderOptions.BaseReaderOptions { ConfigOption MAX_QUEUE_SIZE = key(READER_PREFIX + "max_queue_size") .defaultValue(8192); + + ConfigOption FORMAT = + key(READER_PREFIX + "format") + .defaultValue("debezium-json"); } diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/schema/DebeziumDeserializationFactory.java b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/schema/DebeziumDeserializationFactory.java new file mode 100644 index 000000000..27609dd3a --- /dev/null +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/schema/DebeziumDeserializationFactory.java @@ -0,0 +1,49 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.connector.cdc.schema; + +import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.common.model.ColumnInfo; +import com.bytedance.bitsail.common.option.ReaderOptions; +import com.bytedance.bitsail.common.type.TypeInfoConverter; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.common.typeinfo.TypeInfoUtils; +import com.bytedance.bitsail.component.format.debezium.DebeziumDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumJsonDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.MultipleDebeziumDeserializationSchema; +import com.bytedance.bitsail.connector.cdc.option.BinlogReaderOptions; + +import java.util.List; + +public class DebeziumDeserializationFactory { + + public static DebeziumDeserializationSchema getDebeziumDeserializationSchema(BitSailConfiguration jobConf, TypeInfoConverter typeInfoConverter) { + Boolean multiple = jobConf.get(ReaderOptions.BaseReaderOptions.MULTIPLE_READER_ENABLED); + if (multiple) { + return new MultipleDebeziumDeserializationSchema(jobConf); + } + String format = jobConf.get(BinlogReaderOptions.FORMAT); + if (DebeziumJsonDeserializationSchema.NAME.equalsIgnoreCase(format)) { + return new DebeziumJsonDeserializationSchema(jobConf); + } + List columnInfos = jobConf.get(ReaderOptions.BaseReaderOptions.COLUMNS); + RowTypeInfo rowTypeInfo = TypeInfoUtils.getRowTypeInfo(typeInfoConverter, columnInfos); + return new DebeziumRowDeserializationSchema(jobConf, rowTypeInfo); + + } +} diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/BaseCDCSource.java b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/BaseCDCSource.java index 1d3c5f979..e5c6a1cde 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/BaseCDCSource.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/BaseCDCSource.java @@ -22,6 +22,7 @@ import com.bytedance.bitsail.base.connector.reader.v1.SourceSplitCoordinator; import com.bytedance.bitsail.base.execution.ExecutionEnviron; import com.bytedance.bitsail.base.extension.ParallelismComputable; +import com.bytedance.bitsail.base.extension.SupportProducedType; import com.bytedance.bitsail.base.parallelism.ParallelismAdvice; import com.bytedance.bitsail.base.serializer.BinarySerializer; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; @@ -29,6 +30,9 @@ import com.bytedance.bitsail.common.row.Row; import com.bytedance.bitsail.common.type.BitSailTypeInfoConverter; import com.bytedance.bitsail.common.type.TypeInfoConverter; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; +import com.bytedance.bitsail.component.format.debezium.DebeziumDeserializationSchema; +import com.bytedance.bitsail.connector.cdc.schema.DebeziumDeserializationFactory; import com.bytedance.bitsail.connector.cdc.source.coordinator.CDCSourceSplitCoordinator; import com.bytedance.bitsail.connector.cdc.source.coordinator.state.AssignmentStateSerializer; import com.bytedance.bitsail.connector.cdc.source.coordinator.state.BaseAssignmentState; @@ -40,18 +44,20 @@ /** * Source to read mysql binlog. */ -public abstract class BaseCDCSource implements Source, ParallelismComputable { +public abstract class BaseCDCSource implements Source, ParallelismComputable, SupportProducedType { protected BitSailConfiguration commonConf; protected BitSailConfiguration readerConf; protected BaseSplitSerializer splitSerializer; + protected DebeziumDeserializationSchema deserializationSchema; @Override public void configure(ExecutionEnviron execution, BitSailConfiguration readerConfiguration) throws IOException { this.readerConf = readerConfiguration; this.commonConf = execution.getCommonConfiguration(); this.splitSerializer = createSplitSerializer(); + this.deserializationSchema = DebeziumDeserializationFactory.getDebeziumDeserializationSchema(readerConf, createTypeInfoConverter()); } @Override @@ -97,4 +103,9 @@ public ParallelismAdvice getParallelismAdvice(BitSailConfiguration commonConf, B .enforceDownStreamChain(false) .build(); } + + @Override + public RowTypeInfo getProducedType() { + return deserializationSchema.getProducedType(); + } } diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/reader/BaseCDCSourceReader.java b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/reader/BaseCDCSourceReader.java index 7b0cb7db5..79733c71e 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/reader/BaseCDCSourceReader.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/source/reader/BaseCDCSourceReader.java @@ -22,6 +22,7 @@ import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.option.CommonOptions; import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.component.format.debezium.DebeziumDeserializationSchema; import com.bytedance.bitsail.connector.cdc.source.event.BinlogCompleteAckEvent; import com.bytedance.bitsail.connector.cdc.source.split.BaseCDCSplit; import com.bytedance.bitsail.connector.cdc.source.split.BinlogSplit; @@ -51,12 +52,16 @@ public abstract class BaseCDCSourceReader implements SourceReader(); this.reader = getReader(); this.splitSubmitted = false; diff --git a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java b/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java deleted file mode 100644 index 524aa69dd..000000000 --- a/bitsail-connectors/connector-cdc/connector-cdc-base/src/main/java/com/bytedance/bitsail/connector/cdc/util/MultipleTableRowUtils.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.bytedance.bitsail.connector.cdc.util; - -import com.bytedance.bitsail.common.catalog.table.TableId; -import com.bytedance.bitsail.common.row.MultipleTableRow; - -import org.apache.commons.lang3.StringUtils; -import org.apache.kafka.connect.data.Struct; -import org.apache.kafka.connect.source.SourceRecord; - -import static io.debezium.data.Envelope.FieldName.SOURCE; - -public class MultipleTableRowUtils { - - public static MultipleTableRow fromSourceRecord(SourceRecord record, byte[] serialized) { - Struct value = (Struct) record.value(); - TableId tableId = TableId - .of(value.getStruct(SOURCE).getString("db"), value.getStruct(SOURCE).getString("table")); - return MultipleTableRow - .of(tableId.toString(), - null, - new String(serialized), - StringUtils.EMPTY, - StringUtils.EMPTY); - } -} diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml b/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml index 57076fec0..669fe2d70 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/pom.xml @@ -42,12 +42,6 @@ ${revision} - - com.bytedance.bitsail - bitsail-component-format-debezium - ${revision} - - io.debezium debezium-connector-mysql diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java index 32e139770..8ae308368 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/MysqlCDCSource.java @@ -17,10 +17,7 @@ package com.bytedance.bitsail.connector.cdc.mysql.source; import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; -import com.bytedance.bitsail.base.extension.SupportProducedType; -import com.bytedance.bitsail.common.row.MultipleTableRow; import com.bytedance.bitsail.common.row.Row; -import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; import com.bytedance.bitsail.connector.cdc.mysql.source.reader.MysqlCDCSourceReader; import com.bytedance.bitsail.connector.cdc.mysql.source.split.MysqlSplitSerializer; import com.bytedance.bitsail.connector.cdc.source.BaseCDCSource; @@ -33,13 +30,14 @@ /** * Source to read mysql binlog. */ -public class MysqlCDCSource extends BaseCDCSource implements SupportProducedType { +public class MysqlCDCSource extends BaseCDCSource { private static final Logger LOG = LoggerFactory.getLogger(MysqlCDCSource.class); + @Override public SourceReader createReader(SourceReader.Context readerContext) { LOG.info("Create Mysql CDC Source"); - return new MysqlCDCSourceReader(readerConf, commonConf, readerContext); + return new MysqlCDCSourceReader(readerConf, commonConf, readerContext, deserializationSchema); } @Override @@ -52,8 +50,4 @@ public String getReaderName() { return "mysql_cdc"; } - @Override - public RowTypeInfo getProducedType() { - return MultipleTableRow.MULTIPLE_TABLE_ROW_TYPE_INFO; - } } diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java index 62c6a7b88..6fc08ccdb 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java @@ -16,16 +16,16 @@ package com.bytedance.bitsail.connector.cdc.mysql.source.debezium; +import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.row.Row; -import com.bytedance.bitsail.component.format.debezium.JsonDebeziumSerializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumDeserializationSchema; import com.bytedance.bitsail.connector.cdc.mysql.source.config.MysqlConfig; import com.bytedance.bitsail.connector.cdc.mysql.source.schema.SchemaUtils; import com.bytedance.bitsail.connector.cdc.mysql.source.schema.TableChangeConverter; import com.bytedance.bitsail.connector.cdc.option.BinlogReaderOptions; import com.bytedance.bitsail.connector.cdc.source.reader.BinlogSplitReader; import com.bytedance.bitsail.connector.cdc.source.split.BinlogSplit; -import com.bytedance.bitsail.connector.cdc.util.MultipleTableRowUtils; import com.github.shyiko.mysql.binlog.BinaryLogClient; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -113,22 +113,24 @@ public class MysqlBinlogSplitReader implements BinlogSplitReader { private final int subtaskId; - private final JsonDebeziumSerializationSchema serializer; + private final DebeziumDeserializationSchema deserializationSchema; private final BitSailConfiguration jobConf; - public MysqlBinlogSplitReader(BitSailConfiguration jobConf, int subtaskId) { + public MysqlBinlogSplitReader(BitSailConfiguration jobConf, + SourceReader.Context context, + DebeziumDeserializationSchema deserializationSchema) { this.jobConf = jobConf; this.mysqlConfig = MysqlConfig.fromBitSailConf(jobConf); this.schemaNameAdjuster = SchemaNameAdjuster.create(); // handle configuration this.connectorConfig = mysqlConfig.getDbzMySqlConnectorConfig(); - this.subtaskId = subtaskId; + this.subtaskId = context.getIndexOfSubtask(); ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat("mysql-binlog-reader-" + this.subtaskId).build(); this.executorService = Executors.newSingleThreadExecutor(threadFactory); this.offset = new HashMap<>(); - this.serializer = new JsonDebeziumSerializationSchema(jobConf); this.isRunning = false; + this.deserializationSchema = deserializationSchema; } public void readSplit(BinlogSplit split) { @@ -280,9 +282,7 @@ public boolean isRunning() { public Row poll() { SourceRecord record = this.recordIterator.next(); this.offset = record.sourceOffset(); - byte[] serialized = this.serializer.serialize(record); - return MultipleTableRowUtils.fromSourceRecord(record, serialized) - .asRow(); + return deserializationSchema.deserialize(record); } @Override diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/reader/MysqlCDCSourceReader.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/reader/MysqlCDCSourceReader.java index bc2ab9f99..5b7e97cd0 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/reader/MysqlCDCSourceReader.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/reader/MysqlCDCSourceReader.java @@ -18,6 +18,7 @@ import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.component.format.debezium.DebeziumDeserializationSchema; import com.bytedance.bitsail.connector.cdc.mysql.source.debezium.DebeziumHelper; import com.bytedance.bitsail.connector.cdc.mysql.source.debezium.MysqlBinlogSplitReader; import com.bytedance.bitsail.connector.cdc.source.offset.BinlogOffset; @@ -36,8 +37,11 @@ public class MysqlCDCSourceReader extends BaseCDCSourceReader { private static final Logger LOG = LoggerFactory.getLogger(MysqlCDCSourceReader.class); - public MysqlCDCSourceReader(BitSailConfiguration readerConf, BitSailConfiguration commonConf, Context readerContext) { - super(readerConf, commonConf, readerContext); + public MysqlCDCSourceReader(BitSailConfiguration readerConf, + BitSailConfiguration commonConf, + Context context, + DebeziumDeserializationSchema deserializationSchema) { + super(readerConf, commonConf, context, deserializationSchema); } @Override @@ -56,6 +60,7 @@ public List snapshotState(long checkpointId) { @Override public BinlogSplitReader getReader() { - return new MysqlBinlogSplitReader(readerConf, readerContext.getIndexOfSubtask()); + return new MysqlBinlogSplitReader(readerConf, readerContext, deserializationSchema); } + } diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java index 79adb706f..8b4f2bbb0 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java @@ -16,11 +16,14 @@ package com.bytedance.bitsail.connector.cdc.mysql.source; +import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; import com.bytedance.bitsail.common.row.Row; +import com.bytedance.bitsail.component.format.debezium.DebeziumJsonDeserializationSchema; import com.bytedance.bitsail.connector.cdc.model.ClusterInfo; import com.bytedance.bitsail.connector.cdc.model.ConnectionInfo; import com.bytedance.bitsail.connector.cdc.mysql.source.container.MySQLContainerMariadbAdapter; +import com.bytedance.bitsail.connector.cdc.mysql.source.context.SourceMockContext; import com.bytedance.bitsail.connector.cdc.mysql.source.debezium.MysqlBinlogSplitReader; import com.bytedance.bitsail.connector.cdc.mysql.source.schema.SchemaUtils; import com.bytedance.bitsail.connector.cdc.option.BinlogReaderOptions; @@ -64,6 +67,8 @@ public class MockConnectionsTest { private static final String TEST_USERNAME = "user1"; private static final String TEST_PASSWORD = "password1"; private static MySQLContainer container; + private static SourceReader.Context context; + private static DebeziumJsonDeserializationSchema deserializationSchema; @BeforeClass public static void before() { @@ -74,6 +79,8 @@ public static void before() { .withPassword(TEST_PASSWORD) .withLogConsumer(new Slf4jLogConsumer(LOG)); + deserializationSchema = new DebeziumJsonDeserializationSchema(BitSailConfiguration.newDefault()); + context = new SourceMockContext(1, deserializationSchema.getProducedType()); Startables.deepStart(Stream.of(container)).join(); } @@ -129,7 +136,7 @@ public void testBinlogSplitReader() throws InterruptedException { jobConf.set(BinlogReaderOptions.PASSWORD, TEST_PASSWORD); jobConf.set(BinlogReaderOptions.INITIAL_OFFSET_TYPE, "latest"); - MysqlBinlogSplitReader reader = new MysqlBinlogSplitReader(jobConf, 0); + MysqlBinlogSplitReader reader = new MysqlBinlogSplitReader(jobConf, context, deserializationSchema); BinlogSplit split = new BinlogSplit("split-1", BinlogOffset.earliest(), BinlogOffset.boundless()); reader.readSplit(split); int maxPeriod = 0; diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/context/SourceMockContext.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/context/SourceMockContext.java new file mode 100644 index 000000000..e25bac10b --- /dev/null +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/context/SourceMockContext.java @@ -0,0 +1,46 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.connector.cdc.mysql.source.context; + +import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; + +public class SourceMockContext implements SourceReader.Context { + + private final int id; + private final RowTypeInfo rowTypeInfo; + + public SourceMockContext(int id, RowTypeInfo rowTypeInfo) { + this.id = id; + this.rowTypeInfo = rowTypeInfo; + } + + @Override + public RowTypeInfo getRowTypeInfo() { + return rowTypeInfo; + } + + @Override + public int getIndexOfSubtask() { + return id; + } + + @Override + public void sendSplitRequest() { + + } +} diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java index 79ecadbde..f88c67b9f 100644 --- a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableSink.java @@ -31,7 +31,7 @@ import com.bytedance.bitsail.common.row.Row; import com.bytedance.bitsail.common.type.TypeInfoConverter; import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; -import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowFilterNamesDeserializationSchema; import com.bytedance.bitsail.core.common.serializer.multiple.MultipleTableCommitSerializer; import com.bytedance.bitsail.core.common.serializer.multiple.MultipleTableStateSerializer; import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; @@ -101,7 +101,7 @@ public Writer, MultipleTableState) realSink, catalogTables, patternOfTable, - new DebeziumRowDeserializationSchema(writerConfiguration)); + new DebeziumRowFilterNamesDeserializationSchema(writerConfiguration)); } @Override diff --git a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java index aa9efeb5e..ecfd5dd3f 100644 --- a/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java +++ b/bitsail-cores/bitsail-core-common/src/main/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriter.java @@ -31,7 +31,7 @@ import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; import com.bytedance.bitsail.common.typeinfo.TypeInfo; import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; -import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowFilterNamesDeserializationSchema; import com.bytedance.bitsail.core.common.sink.multiple.comittable.MultipleTableCommit; import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; @@ -60,7 +60,7 @@ public class MultipleTableWriter> context; private final BitSailConfiguration templateConfiguration; private final Map catalogTables; - private final DebeziumRowDeserializationSchema deserializationSchema; + private final DebeziumRowFilterNamesDeserializationSchema deserializationSchema; private final TypeInfoValueConverter valueConverter; private final Pattern patternOfTable; private final String database; @@ -75,7 +75,7 @@ public MultipleTableWriter(BitSailConfiguration templateConfiguration, SupportMultipleSinkTable supplier, Map catalogTables, Pattern patternOfTable, - DebeziumRowDeserializationSchema deserializationSchema) { + DebeziumRowFilterNamesDeserializationSchema deserializationSchema) { this.templateConfiguration = templateConfiguration; this.context = context; this.supplier = supplier; diff --git a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java index 9199e8c1b..32dfa0434 100644 --- a/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java +++ b/bitsail-cores/bitsail-core-common/src/test/java/com/bytedance/bitsail/core/common/sink/multiple/MultipleTableWriterTest.java @@ -30,7 +30,7 @@ import com.bytedance.bitsail.common.typeinfo.TypeInfo; import com.bytedance.bitsail.common.typeinfo.TypeInfoValueConverter; import com.bytedance.bitsail.common.typeinfo.TypeInfos; -import com.bytedance.bitsail.component.format.debezium.DebeziumRowDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumRowFilterNamesDeserializationSchema; import com.bytedance.bitsail.core.common.sink.MultiTablePrintSink; import com.bytedance.bitsail.core.common.sink.multiple.state.MultipleTableState; @@ -98,7 +98,7 @@ public List> getRestoreStates() { (SupportMultipleSinkTable) multiTablePrintSink, catalogTables, Pattern.compile("\\.*"), - new DebeziumRowDeserializationSchema(jobConf)); + new DebeziumRowFilterNamesDeserializationSchema(jobConf)); } private Map mockCatalogTables() { diff --git a/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/MysqlBinlogSplitReaderContainerITCase.java b/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/MysqlBinlogSplitReaderContainerITCase.java index 2f5742cb1..8b6a531d9 100644 --- a/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/MysqlBinlogSplitReaderContainerITCase.java +++ b/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/MysqlBinlogSplitReaderContainerITCase.java @@ -16,7 +16,10 @@ package com.bytedance.bitsail.test.integration.cdc.mysql; +import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; import com.bytedance.bitsail.common.configuration.BitSailConfiguration; +import com.bytedance.bitsail.component.format.debezium.DebeziumDeserializationSchema; +import com.bytedance.bitsail.component.format.debezium.DebeziumJsonDeserializationSchema; import com.bytedance.bitsail.connector.cdc.model.ClusterInfo; import com.bytedance.bitsail.connector.cdc.model.ConnectionInfo; import com.bytedance.bitsail.connector.cdc.mysql.source.debezium.MysqlBinlogSplitReader; @@ -25,6 +28,7 @@ import com.bytedance.bitsail.connector.cdc.source.split.BinlogSplit; import com.bytedance.bitsail.test.integration.cdc.mysql.container.MySQLContainerMariadbAdapter; import com.bytedance.bitsail.test.integration.cdc.mysql.container.util.TestDatabase; +import com.bytedance.bitsail.test.integration.cdc.mysql.context.SourceMockContext; import com.google.common.collect.Lists; import org.junit.After; @@ -50,6 +54,8 @@ public class MysqlBinlogSplitReaderContainerITCase { private static final String TEST_DATABASE = "test"; private MySQLContainer container; + private DebeziumDeserializationSchema deserializationSchema; + private SourceReader.Context context; @Before public void before() { @@ -63,6 +69,8 @@ public void before() { .withLogConsumer(new Slf4jLogConsumer(LOG)); //container.addParameter("MY_CNF", "container/my.cnf"); + deserializationSchema = new DebeziumJsonDeserializationSchema(BitSailConfiguration.newDefault()); + context = new SourceMockContext(0, deserializationSchema.getProducedType()); Startables.deepStart(Stream.of(container)).join(); } @@ -101,7 +109,7 @@ public void testBinlogReader() throws InterruptedException { jobConf.set("job.reader.debezium.schema.history.internal", "io.debezium.relational.history.MemorySchemaHistory"); jobConf.set("job.reader.debezium.database.history", "io.debezium.relational.history.MemoryDatabaseHistory"); - MysqlBinlogSplitReader reader = new MysqlBinlogSplitReader(jobConf, 0); + MysqlBinlogSplitReader reader = new MysqlBinlogSplitReader(jobConf, context, deserializationSchema); BinlogSplit split = new BinlogSplit("split-1", BinlogOffset.earliest(), BinlogOffset.boundless()); reader.readSplit(split); int maxPeriod = 0; diff --git a/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/context/SourceMockContext.java b/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/context/SourceMockContext.java new file mode 100644 index 000000000..23e87a729 --- /dev/null +++ b/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/java/com/bytedance/bitsail/test/integration/cdc/mysql/context/SourceMockContext.java @@ -0,0 +1,46 @@ +/* + * Copyright 2022-2023 Bytedance Ltd. and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bytedance.bitsail.test.integration.cdc.mysql.context; + +import com.bytedance.bitsail.base.connector.reader.v1.SourceReader; +import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; + +public class SourceMockContext implements SourceReader.Context { + + private final int id; + private final RowTypeInfo rowTypeInfo; + + public SourceMockContext(int id, RowTypeInfo rowTypeInfo) { + this.id = id; + this.rowTypeInfo = rowTypeInfo; + } + + @Override + public RowTypeInfo getRowTypeInfo() { + return rowTypeInfo; + } + + @Override + public int getIndexOfSubtask() { + return id; + } + + @Override + public void sendSplitRequest() { + + } +} From f88495cde2ab56b9d71ea648877afb1696422d58 Mon Sep 17 00:00:00 2001 From: haoke Date: Fri, 28 Apr 2023 18:11:52 +0800 Subject: [PATCH 10/14] [BitSail][Multi-Sink]fixed ut issue#3. --- .../debezium/DebeziumDeserializationSchema.java | 2 ++ .../DebeziumJsonDeserializationSchema.java | 16 +++++++++++----- .../DebeziumRowDeserializationSchema.java | 5 +++++ .../MultipleDebeziumDeserializationSchema.java | 5 +++++ .../source/debezium/MysqlBinlogSplitReader.java | 1 + .../connector/kafka/sink/KafkaWriter.java | 10 ++-------- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java index 83cdb169c..02d74d72c 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumDeserializationSchema.java @@ -24,6 +24,8 @@ public interface DebeziumDeserializationSchema extends DeserializationSchema, SupportProducedType { + void open(); + @Override Row deserialize(SourceRecord sourceRecord); diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java index ffc9faef4..73d256551 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumJsonDeserializationSchema.java @@ -42,9 +42,20 @@ public class DebeziumJsonDeserializationSchema implements DebeziumDeserializatio TypeInfos.STRING_TYPE_INFO, TypeInfos.STRING_TYPE_INFO, TypeInfos.STRING_TYPE_INFO, TypeInfos.LONG_TYPE_INFO }); + private BitSailConfiguration jobConf; private transient JsonConverter jsonConverter; public DebeziumJsonDeserializationSchema(BitSailConfiguration jobConf) { + this.jobConf = jobConf; + } + + @Override + public RowTypeInfo getProducedType() { + return DEBEZIUM_JSON_ROW_TYPE; + } + + @Override + public void open() { this.jsonConverter = new JsonConverter(); boolean includeSchema = jobConf.get(DebeziumWriterOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); final HashMap configs = new HashMap<>(); @@ -53,11 +64,6 @@ public DebeziumJsonDeserializationSchema(BitSailConfiguration jobConf) { jsonConverter.configure(configs); } - @Override - public RowTypeInfo getProducedType() { - return DEBEZIUM_JSON_ROW_TYPE; - } - @SuppressWarnings("checkstyle:MagicNumber") @Override public Row deserialize(SourceRecord sourceRecord) { diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java index 9d2889b78..bdc8440d1 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/DebeziumRowDeserializationSchema.java @@ -33,6 +33,11 @@ public DebeziumRowDeserializationSchema(BitSailConfiguration jobConf, RowTypeInf this.rowTypeInfo = rowTypeInfo; } + @Override + public void open() { + + } + @Override public Row deserialize(SourceRecord sourceRecord) { //TODO diff --git a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java index b8fd170dd..15f4944ef 100644 --- a/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java +++ b/bitsail-components/bitsail-component-formats/bitsail-component-format-debezium/src/main/java/com/bytedance/bitsail/component/format/debezium/MultipleDebeziumDeserializationSchema.java @@ -39,8 +39,13 @@ public class MultipleDebeziumDeserializationSchema implements DebeziumDeserializationSchema, SupportProducedType { private transient JsonConverter jsonConverter; + private BitSailConfiguration jobConf; public MultipleDebeziumDeserializationSchema(BitSailConfiguration jobConf) { + this.jobConf = jobConf; + } + + public void open() { this.jsonConverter = new JsonConverter(); boolean includeSchema = jobConf.get(DebeziumWriterOptions.DEBEZIUM_JSON_INCLUDE_SCHEMA); final HashMap configs = new HashMap<>(); diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java index 6fc08ccdb..fc1fb08f6 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/main/java/com/bytedance/bitsail/connector/cdc/mysql/source/debezium/MysqlBinlogSplitReader.java @@ -131,6 +131,7 @@ public MysqlBinlogSplitReader(BitSailConfiguration jobConf, this.offset = new HashMap<>(); this.isRunning = false; this.deserializationSchema = deserializationSchema; + this.deserializationSchema.open(); } public void readSplit(BinlogSplit split) { diff --git a/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java b/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java index 73a2e6659..52ebe07a8 100644 --- a/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java +++ b/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java @@ -128,7 +128,7 @@ public KafkaWriter(BitSailConfiguration commonConf, BitSailConfiguration writerC public void write(Row record) throws IOException { checkErroneous(); //TODO: refactor this as a format factory - if (format.equals("debezium")) { + if (format.equals("debezium-json")) { writeDebezium(record); } else { String result = jsonConverter.convert(record).toString(); @@ -153,13 +153,7 @@ public void writeDebezium(Row record) { String key = record.getString(BinlogRow.KEY_INDEX); partitionFieldsValues[0] = key; int partitionId = choosePartitionIdByFields(partitionFieldsValues); - Map headers = new HashMap<>(4); - headers.put("db", record.getString(BinlogRow.DATABASE_INDEX)); - headers.put("table", record.getString(BinlogRow.TABLE_INDEX)); - headers.put("ddl_flag", String.valueOf(record.getBoolean(BinlogRow.DDL_FLAG_INDEX))); - headers.put("version", String.valueOf(record.getInt(BinlogRow.VERSION_INDEX))); - byte[] value = record.getBinary(BinlogRow.VALUE_INDEX); - sendWithHeaders(key, value, partitionId, headers); + sendWithHeaders(null, record.getBinary(2), partitionId, null); } @Override From 7432bd49a396406afdb33a5ddeec3ae32668b247 Mon Sep 17 00:00:00 2001 From: haoke Date: Fri, 28 Apr 2023 18:30:34 +0800 Subject: [PATCH 11/14] [BitSail][Multi-Sink]fixed ut issue#5. --- .../com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java | 2 +- .../src/test/resources/bitsail_mysql_cdc_kafka.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java b/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java index 52ebe07a8..cb0ce4db0 100644 --- a/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java +++ b/bitsail-connectors/connector-kafka/src/main/java/com/bytedance/bitsail/connector/kafka/sink/KafkaWriter.java @@ -153,7 +153,7 @@ public void writeDebezium(Row record) { String key = record.getString(BinlogRow.KEY_INDEX); partitionFieldsValues[0] = key; int partitionId = choosePartitionIdByFields(partitionFieldsValues); - sendWithHeaders(null, record.getBinary(2), partitionId, null); + sendWithHeaders(null, record.getString(2), partitionId, null); } @Override diff --git a/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/resources/bitsail_mysql_cdc_kafka.json b/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/resources/bitsail_mysql_cdc_kafka.json index 73e9e8b28..2965f5629 100644 --- a/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/resources/bitsail_mysql_cdc_kafka.json +++ b/bitsail-test/bitsail-test-integration/bitsail-test-integration-cdc/src/test/resources/bitsail_mysql_cdc_kafka.json @@ -53,7 +53,7 @@ }, "writer": { "class": "com.bytedance.bitsail.connector.kafka.sink.KafkaSink", - "content_type": "debezium", + "content_type": "debezium-json", "columns": [ { "name": "db", From 61e892f347003a63910a02b8f6b8116563701c96 Mon Sep 17 00:00:00 2001 From: haoke Date: Tue, 9 May 2023 18:29:30 +0800 Subject: [PATCH 12/14] [BitSail][Multi-Sink]fix npe. --- .../connector/cdc/mysql/source/MockConnectionsTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java index 8b4f2bbb0..30bac45f5 100644 --- a/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java +++ b/bitsail-connectors/connector-cdc/connector-cdc-mysql/src/test/java/com/bytedance/bitsail/connector/cdc/mysql/source/MockConnectionsTest.java @@ -50,7 +50,6 @@ import org.testcontainers.utility.DockerImageName; import java.time.ZoneId; -import java.util.Arrays; import java.util.Map; import java.util.Properties; import java.util.concurrent.TimeUnit; @@ -143,7 +142,7 @@ public void testBinlogSplitReader() throws InterruptedException { while (maxPeriod <= 5) { if (reader.hasNext()) { Row row = reader.poll(); - Arrays.stream(row.getFields()).forEach(o -> LOG.info(o.toString())); + LOG.info("row: {}.", row); maxPeriod++; } TimeUnit.SECONDS.sleep(1); From aca10cfd06cca597835b8b6fda6ba8e8fe09d804 Mon Sep 17 00:00:00 2001 From: haoke Date: Wed, 10 May 2023 11:54:21 +0800 Subject: [PATCH 13/14] [BitSail][Multi-Sink]fixed ut issue#2. --- .../transform/delegate/DelegateFlinkMapFunction.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java index 997f103e3..1c39ad319 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java @@ -25,7 +25,7 @@ import com.bytedance.bitsail.common.option.TransformOptions; import com.bytedance.bitsail.common.row.Row; import com.bytedance.bitsail.common.typeinfo.RowTypeInfo; -import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConvertSerializer; +import com.bytedance.bitsail.flink.core.delagate.converter.FlinkRowConverter; import com.bytedance.bitsail.flink.core.typeutils.AutoDetectFlinkTypeInfoUtil; import com.bytedance.bitsail.flink.core.typeutils.NativeFlinkTypeInfoUtil; @@ -39,14 +39,14 @@ public class DelegateFlinkMapFunction i private final BitSailMapFunction realMapFunction; - private final FlinkRowConvertSerializer flinkRowConvertSerializer; + private final FlinkRowConverter rowConverter; private final RowTypeInfo inputType; public DelegateFlinkMapFunction(BitSailConfiguration jobConf, TypeInformation flinkTypes) { this.inputType = AutoDetectFlinkTypeInfoUtil.bridgeRowTypeInfo((org.apache.flink.api.java.typeutils.RowTypeInfo) flinkTypes); this.realMapFunction = createMapFunction(jobConf, inputType); - this.flinkRowConvertSerializer = new FlinkRowConvertSerializer( + this.rowConverter = new FlinkRowConverter( this.inputType, jobConf); } @@ -54,8 +54,8 @@ public DelegateFlinkMapFunction(BitSailConfiguration jobConf, TypeInformation @Override public O map(I value) throws Exception { org.apache.flink.types.Row outputRow; - Row bitsailRow = flinkRowConvertSerializer.deserialize((org.apache.flink.types.Row) value); - outputRow = flinkRowConvertSerializer.serialize(this.realMapFunction.map(bitsailRow)); + Row bitsailRow = rowConverter.from((org.apache.flink.types.Row) value); + outputRow = rowConverter.to(this.realMapFunction.map(bitsailRow)); return (O) outputRow; } From aba8eff88f68a01a20c834369c17983778084664 Mon Sep 17 00:00:00 2001 From: haoke Date: Wed, 10 May 2023 15:04:36 +0800 Subject: [PATCH 14/14] [BitSail][Multi-Sink]fixed ut issue#3. --- .../delegate/DelegateFlinkMapFunction.java | 24 ++++++++++++------- .../writer/delegate/DelegateFlinkWriter.java | 9 ++++--- .../writer/delegate/DelegateFlinkWriter.java | 12 +++++----- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java index 1c39ad319..c4f544e6f 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/transform/delegate/DelegateFlinkMapFunction.java @@ -29,25 +29,33 @@ import com.bytedance.bitsail.flink.core.typeutils.AutoDetectFlinkTypeInfoUtil; import com.bytedance.bitsail.flink.core.typeutils.NativeFlinkTypeInfoUtil; -import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.configuration.Configuration; import java.util.List; import java.util.Locale; -public class DelegateFlinkMapFunction implements MapFunction { +public class DelegateFlinkMapFunction extends RichMapFunction { private final BitSailMapFunction realMapFunction; - private final FlinkRowConverter rowConverter; + private final RowTypeInfo rowTypeInfo; - private final RowTypeInfo inputType; + private final BitSailConfiguration jobConf; - public DelegateFlinkMapFunction(BitSailConfiguration jobConf, TypeInformation flinkTypes) { - this.inputType = AutoDetectFlinkTypeInfoUtil.bridgeRowTypeInfo((org.apache.flink.api.java.typeutils.RowTypeInfo) flinkTypes); - this.realMapFunction = createMapFunction(jobConf, inputType); + private transient FlinkRowConverter rowConverter; + + public DelegateFlinkMapFunction(BitSailConfiguration jobConf, TypeInformation inputRowTypeInfo) { + this.jobConf = jobConf; + this.rowTypeInfo = AutoDetectFlinkTypeInfoUtil.bridgeRowTypeInfo((org.apache.flink.api.java.typeutils.RowTypeInfo) inputRowTypeInfo); + this.realMapFunction = createMapFunction(jobConf, rowTypeInfo); + } + + @Override + public void open(Configuration parameters) throws Exception { this.rowConverter = new FlinkRowConverter( - this.inputType, + rowTypeInfo, jobConf); } diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java index 9dc456fd5..30b666518 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.11-bridge/src/main/java/com/bytedance/bitsail/core/flink/bridge/writer/delegate/DelegateFlinkWriter.java @@ -81,8 +81,8 @@ public class DelegateFlinkWriter sink; private final BitSailConfiguration writerConfiguration; private final BitSailConfiguration commonConfiguration; - private final FlinkRowConverter flinkRowConverter; private final RowTypeInfo rowTypeInfo; + private transient FlinkRowConverter flinkRowConverter; private transient Writer writer; private transient ListState writeState; private boolean endOfInput = false; @@ -116,10 +116,6 @@ public DelegateFlinkWriter(BitSailConfiguration commonConfiguration, this.rowTypeInfo = TypeInfoUtils .getRowTypeInfo(sink.createTypeInfoConverter(), columnInfos); } - - this.flinkRowConverter = new FlinkRowConverter( - this.rowTypeInfo, - this.commonConfiguration); } @Override @@ -130,6 +126,9 @@ public void open() throws Exception { if (dirtyCollector instanceof RuntimeContextInjectable) { ((RuntimeContextInjectable) dirtyCollector).setRuntimeContext(getRuntimeContext()); } + this.flinkRowConverter = new FlinkRowConverter( + this.rowTypeInfo, + this.commonConfiguration); messenger.open(); ColumnCast.initColumnCast(commonConfiguration); diff --git a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java index 2792a577d..2a5ab9911 100644 --- a/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java +++ b/bitsail-cores/bitsail-core-flink/bitsail-core-flink-1.16-bridge/src/main/java/com/bytedance/bitsail/core/flink116/bridge/writer/delegate/DelegateFlinkWriter.java @@ -81,8 +81,8 @@ public class DelegateFlinkWriter sink; private final BitSailConfiguration writerConfiguration; private final BitSailConfiguration commonConfiguration; - private final FlinkRowConverter flinkRowConvertSerializer; private final RowTypeInfo rowTypeInfo; + private transient FlinkRowConverter flinkRowConverter; private transient Writer writer; private transient ListState writeState; private boolean endOfInput = false; @@ -116,10 +116,6 @@ public DelegateFlinkWriter(BitSailConfiguration commonConfiguration, this.rowTypeInfo = TypeInfoUtils .getRowTypeInfo(sink.createTypeInfoConverter(), columnInfos); } - - this.flinkRowConvertSerializer = new FlinkRowConverter( - this.rowTypeInfo, - this.commonConfiguration); } @Override @@ -130,6 +126,10 @@ public void open() throws Exception { if (dirtyCollector instanceof RuntimeContextInjectable) { ((RuntimeContextInjectable) dirtyCollector).setRuntimeContext(getRuntimeContext()); } + + flinkRowConverter = new FlinkRowConverter( + this.rowTypeInfo, + this.commonConfiguration); messenger.open(); ColumnCast.initColumnCast(commonConfiguration); @@ -191,7 +191,7 @@ public void processElement(StreamRecord element) throws Exception { try { if (value instanceof Row) { // convert flink row to BitSail row. - com.bytedance.bitsail.common.row.Row deserializer = flinkRowConvertSerializer.from((Row) value); + com.bytedance.bitsail.common.row.Row deserializer = flinkRowConverter.from((Row) value); writer.write((InputT) deserializer); } else { writer.write(element.getValue());