Skip to content

Commit

Permalink
kafk update mode
Browse files Browse the repository at this point in the history
  • Loading branch information
todd5167 committed Mar 17, 2020
1 parent 14446f4 commit ec2b84e
Show file tree
Hide file tree
Showing 23 changed files with 1,067 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.Meter;
import org.apache.flink.metrics.MeterView;
import org.apache.flink.table.runtime.types.CRow;
import org.apache.flink.types.Row;


Expand All @@ -34,11 +35,11 @@
* author: toutian
* create: 2019/12/24
*/
public class SerializationMetricWrapper implements SerializationSchema<Row> {
public class SerializationMetricWrapper implements SerializationSchema<CRow> {

private static final long serialVersionUID = 1L;

private SerializationSchema<Row> serializationSchema;
private SerializationSchema<CRow> serializationSchema;

private transient RuntimeContext runtimeContext;

Expand All @@ -47,7 +48,7 @@ public class SerializationMetricWrapper implements SerializationSchema<Row> {
protected transient Meter dtNumRecordsOutRate;


public SerializationMetricWrapper(SerializationSchema<Row> serializationSchema) {
public SerializationMetricWrapper(SerializationSchema<CRow> serializationSchema) {
this.serializationSchema = serializationSchema;
}

Expand All @@ -57,7 +58,7 @@ public void initMetric() {
}

@Override
public byte[] serialize(Row element) {
public byte[] serialize(CRow element) {
beforeSerialize();
byte[] row = serializationSchema.serialize(element);
afterSerialize();
Expand All @@ -79,7 +80,7 @@ public void setRuntimeContext(RuntimeContext runtimeContext) {
this.runtimeContext = runtimeContext;
}

public SerializationSchema<Row> getSerializationSchema() {
public SerializationSchema<CRow> getSerializationSchema() {
return serializationSchema;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@

import com.dtstack.flink.sql.format.FormatType;
import com.dtstack.flink.sql.format.SerializationMetricWrapper;
import com.dtstack.flink.sql.sink.kafka.serialization.AvroCRowSerializationSchema;
import com.dtstack.flink.sql.sink.kafka.serialization.CsvCRowSerializationSchema;
import com.dtstack.flink.sql.sink.kafka.serialization.JsonCRowSerializationSchema;
import com.dtstack.flink.sql.sink.kafka.table.KafkaSinkTableInfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.formats.avro.AvroRowSerializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;
import org.apache.flink.formats.json.JsonRowSerializationSchema;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner;
import org.apache.flink.table.runtime.types.CRow;
import org.apache.flink.types.Row;

import java.util.Optional;
Expand All @@ -51,42 +54,36 @@ public abstract class AbstractKafkaProducerFactory {
* @param partitioner
* @return
*/
public abstract RichSinkFunction<Row> createKafkaProducer(KafkaSinkTableInfo kafkaSinkTableInfo, TypeInformation<Row> typeInformation, Properties properties, Optional<FlinkKafkaPartitioner<Row>> partitioner, String[] partitionKeys);
public abstract RichSinkFunction<CRow> createKafkaProducer(KafkaSinkTableInfo kafkaSinkTableInfo, TypeInformation<CRow> typeInformation, Properties properties, Optional<FlinkKafkaPartitioner<CRow>> partitioner, String[] partitionKeys);

protected SerializationMetricWrapper createSerializationMetricWrapper(KafkaSinkTableInfo kafkaSinkTableInfo, TypeInformation<Row> typeInformation) {
return new SerializationMetricWrapper(createSerializationSchema(kafkaSinkTableInfo, typeInformation));
protected SerializationMetricWrapper createSerializationMetricWrapper(KafkaSinkTableInfo kafkaSinkTableInfo, TypeInformation<CRow> typeInformation) {
SerializationSchema<CRow> serializationSchema = createSerializationSchema(kafkaSinkTableInfo, typeInformation);
return new SerializationMetricWrapper(serializationSchema);
}

private SerializationSchema<Row> createSerializationSchema(KafkaSinkTableInfo kafkaSinkTableInfo, TypeInformation<Row> typeInformation) {
SerializationSchema<Row> serializationSchema = null;
private SerializationSchema<CRow> createSerializationSchema(KafkaSinkTableInfo kafkaSinkTableInfo, TypeInformation<CRow> typeInformation) {
SerializationSchema<CRow> serializationSchema = null;
if (FormatType.JSON.name().equalsIgnoreCase(kafkaSinkTableInfo.getSinkDataType())) {

if (StringUtils.isNotBlank(kafkaSinkTableInfo.getSchemaString())) {
serializationSchema = new JsonRowSerializationSchema(kafkaSinkTableInfo.getSchemaString());
serializationSchema = new JsonCRowSerializationSchema(kafkaSinkTableInfo.getSchemaString(), kafkaSinkTableInfo.getUpdateMode());
} else if (typeInformation != null && typeInformation.getArity() != 0) {
serializationSchema = new JsonRowSerializationSchema(typeInformation);
serializationSchema = new JsonCRowSerializationSchema(typeInformation, kafkaSinkTableInfo.getUpdateMode());
} else {
throw new IllegalArgumentException("sinkDataType:" + FormatType.JSON.name() + " must set schemaString(JSON Schema)or TypeInformation<Row>");
}

} else if (FormatType.CSV.name().equalsIgnoreCase(kafkaSinkTableInfo.getSinkDataType())) {

if (StringUtils.isBlank(kafkaSinkTableInfo.getFieldDelimiter())) {
throw new IllegalArgumentException("sinkDataType:" + FormatType.CSV.name() + " must set fieldDelimiter");
}

final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(typeInformation);
final CsvCRowSerializationSchema.Builder serSchemaBuilder = new CsvCRowSerializationSchema.Builder(typeInformation);
serSchemaBuilder.setFieldDelimiter(kafkaSinkTableInfo.getFieldDelimiter().toCharArray()[0]);
serializationSchema = serSchemaBuilder.build();

} else if (FormatType.AVRO.name().equalsIgnoreCase(kafkaSinkTableInfo.getSinkDataType())) {

if (StringUtils.isBlank(kafkaSinkTableInfo.getSchemaString())) {
throw new IllegalArgumentException("sinkDataType:" + FormatType.AVRO.name() + " must set schemaString");
}

serializationSchema = new AvroRowSerializationSchema(kafkaSinkTableInfo.getSchemaString());

serializationSchema = new AvroCRowSerializationSchema(kafkaSinkTableInfo.getSchemaString(),kafkaSinkTableInfo.getUpdateMode());
}

if (null == serializationSchema) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@


import com.dtstack.flink.sql.format.SerializationMetricWrapper;
import com.dtstack.flink.sql.sink.kafka.serialization.JsonCRowSerializationSchema;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.formats.json.JsonRowSerializationSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema;
import org.apache.flink.table.runtime.types.CRow;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.atomic.AtomicLong;

public class CustomerKeyedSerializationSchema implements KeyedSerializationSchema<Row> {
public class CustomerKeyedSerializationSchema implements KeyedSerializationSchema<CRow> {

private static final Logger LOG = LoggerFactory.getLogger(CustomerKeyedSerializationSchema.class);

Expand All @@ -30,38 +32,41 @@ public CustomerKeyedSerializationSchema(SerializationMetricWrapper serialization
this.mapper = new ObjectMapper();
}

public byte[] serializeKey(Row element) {
if(partitionKeys == null || partitionKeys.length <=0){
@Override
public byte[] serializeKey(CRow element) {
if (partitionKeys == null || partitionKeys.length <= 0) {
return null;
}
SerializationSchema<Row> serializationSchema = serializationMetricWrapper.getSerializationSchema();
if(serializationSchema instanceof JsonRowSerializationSchema){
return serializeJsonKey((JsonRowSerializationSchema) serializationSchema, element);
}
SerializationSchema<CRow> serializationSchema = serializationMetricWrapper.getSerializationSchema();
if (serializationSchema instanceof JsonCRowSerializationSchema) {
return serializeJsonKey((JsonCRowSerializationSchema) serializationSchema, element);
}
return null;
}

public byte[] serializeValue(Row element) {
@Override
public byte[] serializeValue(CRow element) {
return this.serializationMetricWrapper.serialize(element);
}

public String getTargetTopic(Row element) {
@Override
public String getTargetTopic(CRow element) {
return null;
}

private byte[] serializeJsonKey(JsonRowSerializationSchema jsonRowSerializationSchema, Row element) {
private byte[] serializeJsonKey(JsonCRowSerializationSchema jsonCRowSerializationSchema, CRow element) {
try {
byte[] data = jsonRowSerializationSchema.serialize(element);
byte[] data = jsonCRowSerializationSchema.serialize(element);
ObjectNode objectNode = mapper.readValue(data, ObjectNode.class);
StringBuilder sb = new StringBuilder();
for(String key : partitionKeys){
if(objectNode.has(key)){
for (String key : partitionKeys) {
if (objectNode.has(key)) {
sb.append(objectNode.get(key.trim()));
}
}
return sb.toString().getBytes();
} catch (Exception e){
if(COUNTER.getAndIncrement() % 1000 == 0){
} catch (Exception e) {
if (COUNTER.getAndIncrement() % 1000 == 0) {
LOG.error("serializeJsonKey error", e);
}
}
Expand Down
Loading

0 comments on commit ec2b84e

Please sign in to comment.