diff --git a/dingo-calcite/src/main/codegen/templates/Parser.jj b/dingo-calcite/src/main/codegen/templates/Parser.jj index c5c5466f64..897fd2fabd 100644 --- a/dingo-calcite/src/main/codegen/templates/Parser.jj +++ b/dingo-calcite/src/main/codegen/templates/Parser.jj @@ -3990,7 +3990,8 @@ List Expression2(ExprContext exprContext) : { if ((op == SqlUserDefinedOperators.L2_DISTANCE || op == SqlUserDefinedOperators.IP_DISTANCE - || op == SqlUserDefinedOperators.COSINE_SIMILARITY) + || op == SqlUserDefinedOperators.COSINE_SIMILARITY + || op == SqlUserDefinedOperators.HAMMING_DISTANCE) && list.size() == 3) { SqlNode call = op.createCall(s.end(this), (SqlNode)list.get(0), (SqlNode)list.get(2)); list.clear(); @@ -7851,6 +7852,7 @@ SqlBinaryOperator BinaryRowOperator() : | { return SqlUserDefinedOperators.L2_DISTANCE; } | { return SqlUserDefinedOperators.COSINE_SIMILARITY; } | { return SqlUserDefinedOperators.IP_DISTANCE; } +| { return SqlUserDefinedOperators.HAMMING_DISTANCE; } } /** @@ -8833,6 +8835,7 @@ void NonReservedKeyWord2of3() : | < L2DISTANCE: "<->" > | < COSINESIMILARITY: "<=>" > | < IPDISTANCE: "<*>" > +| < HAMMINGDISTANCE: ""> | < DOUBLE_PERIOD: ".." > | < QUOTE: "'" > | < DOUBLE_QUOTE: "\"" > diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/DingoDdlExecutor.java b/dingo-calcite/src/main/java/io/dingodb/calcite/DingoDdlExecutor.java index 68efdb1112..4a8f6ff4ba 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/DingoDdlExecutor.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/DingoDdlExecutor.java @@ -2369,9 +2369,10 @@ private static IndexDefinition fromSqlIndexDeclaration( throw new RuntimeException("Column must be not null, column name: " + columnName); } } else if (i == 1) { - if (!columnDefinition.getTypeName().equals("ARRAY") + if (!columnDefinition.getTypeName().equals("BINARY") + && (!columnDefinition.getTypeName().equals("ARRAY") || !(columnDefinition.getElementType() != null - && columnDefinition.getElementType().equals("FLOAT"))) { + && columnDefinition.getElementType().equals("FLOAT")))) { throw new RuntimeException("Invalid column type: " + columnName); } if (columnDefinition.isNullable()) { diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/executor/ShowCreateTableExecutor.java b/dingo-calcite/src/main/java/io/dingodb/calcite/executor/ShowCreateTableExecutor.java index c112fa2465..ba2f0f8228 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/executor/ShowCreateTableExecutor.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/executor/ShowCreateTableExecutor.java @@ -217,6 +217,12 @@ private String getCreateTable() { case VECTOR_DISKANN: type = "DISKANN"; break; + case VECTOR_BINARY_FLAT: + type = "BINARY_FLAT"; + break; + case VECTOR_BINARY_IVF_FLAT: + type = "BINARY_IVF_FLAT"; + break; default: type = "HNSW"; break; @@ -234,6 +240,9 @@ private String getCreateTable() { case "METRIC_TYPE_INNER_PRODUCT": val = "INNER_PRODUCT"; break; + case "METRIC_TYPE_HAMMING" : + val = "HAMMING"; + break; } } createTableSqlStr.append(key).append("=").append(val).append(","); diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/fun/DingoOperatorTable.java b/dingo-calcite/src/main/java/io/dingodb/calcite/fun/DingoOperatorTable.java index ea7fa045cb..ff1a4c27a4 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/fun/DingoOperatorTable.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/fun/DingoOperatorTable.java @@ -33,6 +33,7 @@ import io.dingodb.exec.fun.special.ThrowFun; import io.dingodb.exec.fun.vector.VectorCosineDistanceFun; import io.dingodb.exec.fun.vector.VectorDistanceFun; +import io.dingodb.exec.fun.vector.VectorHammingDistanceFun; import io.dingodb.exec.fun.vector.VectorIPDistanceFun; import io.dingodb.exec.fun.vector.VectorImageFun; import io.dingodb.exec.fun.vector.VectorL2DistanceFun; @@ -336,6 +337,13 @@ private void init() { family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY), SqlFunctionCategory.NUMERIC ); + registerFunction( + VectorHammingDistanceFun.NAME, + FLOAT, + DingoInferTypes.FLOAT, + family(SqlTypeFamily.BINARY, SqlTypeFamily.BINARY), + SqlFunctionCategory.NUMERIC + ); registerFunction( VersionFun.NAME, ReturnTypes.VARCHAR_2000, diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/grammar/SqlUserDefinedOperators.java b/dingo-calcite/src/main/java/io/dingodb/calcite/grammar/SqlUserDefinedOperators.java index 2339a65fdd..b70abce759 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/grammar/SqlUserDefinedOperators.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/grammar/SqlUserDefinedOperators.java @@ -18,6 +18,7 @@ import io.dingodb.common.table.DiskAnnTable; import io.dingodb.exec.fun.vector.VectorCosineDistanceFun; +import io.dingodb.exec.fun.vector.VectorHammingDistanceFun; import io.dingodb.exec.fun.vector.VectorIPDistanceFun; import io.dingodb.exec.fun.vector.VectorL2DistanceFun; import org.apache.calcite.sql.SqlKind; @@ -25,6 +26,7 @@ import org.apache.calcite.sql2rel.SqlDiskAnnOperator; import org.apache.calcite.sql2rel.SqlDocumentOperator; import org.apache.calcite.sql2rel.SqlFunctionScanOperator; +import org.apache.calcite.sql2rel.SqlHammingDistanceOperator; import org.apache.calcite.sql2rel.SqlHybridSearchOperator; import org.apache.calcite.sql2rel.SqlIPDistanceOperator; import org.apache.calcite.sql2rel.SqlL2DistanceOperator; @@ -68,6 +70,7 @@ public class SqlUserDefinedOperators { public static SqlL2DistanceOperator L2_DISTANCE = new SqlL2DistanceOperator(VectorL2DistanceFun.NAME, SqlKind.OTHER_FUNCTION); - + public static SqlHammingDistanceOperator HAMMING_DISTANCE + = new SqlHammingDistanceOperator(VectorHammingDistanceFun.NAME, SqlKind.OTHER_FUNCTION); } diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/rel/LogicalDingoTableScan.java b/dingo-calcite/src/main/java/io/dingodb/calcite/rel/LogicalDingoTableScan.java index a1bb2d1325..166c3b6e78 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/rel/LogicalDingoTableScan.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/rel/LogicalDingoTableScan.java @@ -333,6 +333,9 @@ public static SqlOperator findSqlOperator(String metricType) { case "COSINE": metricTypeFullName = "cosineDistance"; break; + case "HAMMING": + metricTypeFullName = "hammingDistance"; + break; default: metricTypeFullName = null; break; diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoScanProjectRule.java b/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoScanProjectRule.java index a115e77a05..19fb2572af 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoScanProjectRule.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoScanProjectRule.java @@ -85,7 +85,8 @@ public void onMatch(@NonNull RelOptRuleCall call) { public Void visitCall(RexCall call) { if (call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.COSINE_SIMILARITY.getName()) || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.IP_DISTANCE.getName()) - || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.L2_DISTANCE.getName())) { + || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.L2_DISTANCE.getName()) + || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.HAMMING_DISTANCE.getName())) { vectorSelected.add(call); } return super.visitCall(call); @@ -168,7 +169,8 @@ private static LogicalProject getPostVectorFiltering(LogicalProject project, String opName = rexCall.op.getName(); if (opName.equalsIgnoreCase(SqlUserDefinedOperators.COSINE_SIMILARITY.getName()) || opName.equalsIgnoreCase(SqlUserDefinedOperators.IP_DISTANCE.getName()) - || opName.equalsIgnoreCase(SqlUserDefinedOperators.L2_DISTANCE.getName())) { + || opName.equalsIgnoreCase(SqlUserDefinedOperators.L2_DISTANCE.getName()) + || opName.equalsIgnoreCase(SqlUserDefinedOperators.HAMMING_DISTANCE.getName())) { rn.set(true); return inputRef; } diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorIndexRule.java b/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorIndexRule.java index bcc10a31fc..77a1a3f0d7 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorIndexRule.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorIndexRule.java @@ -36,6 +36,7 @@ import io.dingodb.common.util.Pair; import io.dingodb.meta.entity.Column; import io.dingodb.meta.entity.IndexTable; +import io.dingodb.meta.entity.IndexType; import io.dingodb.meta.entity.Table; import lombok.extern.slf4j.Slf4j; import org.apache.calcite.plan.RelOptRuleCall; @@ -57,6 +58,7 @@ import static io.dingodb.calcite.rule.DingoGetByIndexRule.filterIndices; import static io.dingodb.calcite.rule.DingoGetByIndexRule.filterScalarIndices; import static io.dingodb.calcite.rule.DingoGetByIndexRule.getScalaIndices; +import static io.dingodb.calcite.utils.VectorUtils.parseBinaryStringToByteArray; import static io.dingodb.calcite.visitor.function.DingoGetVectorByDistanceVisitFun.getTargetVector; @Slf4j @@ -90,10 +92,18 @@ public static RelNode getDingoGetVectorByDistance(RexNode condition, LogicalDing if (condition != null) { dispatchDistanceCondition(condition, selection, dingoTable); } + int dimension; + if (((IndexTable) vector.getIndexTable()).getIndexType() == IndexType.VECTOR_BINARY_FLAT || + ((IndexTable) vector.getIndexTable()).getIndexType() == IndexType.VECTOR_BINARY_IVF_FLAT) { + byte[] binaryVector = parseBinaryStringToByteArray(vector.getOperands()); + dimension = binaryVector.length; + } else { + List targetVector = getTargetVector(vector.getOperands()); + dimension = targetVector.size(); + } - List targetVector = getTargetVector(vector.getOperands()); // if filter matched point get by primary key, then DingoGetByKeys priority highest - Pair vectorIdPair = getVectorIndex(dingoTable, targetVector.size()); + Pair vectorIdPair = getVectorIndex(dingoTable, dimension); assert vectorIdPair != null; RelTraitSet traitSet = vector.getTraitSet().replace(DingoRelStreaming.of(vector.getTable())); boolean preFilter = vector.getHints() != null diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorProjectRule.java b/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorProjectRule.java index f76e414b76..b58e66f939 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorProjectRule.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/rule/DingoVectorProjectRule.java @@ -67,7 +67,8 @@ public void onMatch(RelOptRuleCall call) { public Void visitCall(RexCall call) { if (call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.COSINE_SIMILARITY.getName()) || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.IP_DISTANCE.getName()) - || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.L2_DISTANCE.getName())) { + || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.L2_DISTANCE.getName()) + || call.op.getName().equalsIgnoreCase(SqlUserDefinedOperators.HAMMING_DISTANCE.getName())) { vectorSelected.add(call); } return super.visitCall(call); diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/utils/VectorUtils.java b/dingo-calcite/src/main/java/io/dingodb/calcite/utils/VectorUtils.java new file mode 100644 index 0000000000..14ff4c37f6 --- /dev/null +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/utils/VectorUtils.java @@ -0,0 +1,141 @@ +/* + * Copyright 2021 DataCanvas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.dingodb.calcite.utils; + +import io.dingodb.exec.fun.vector.VectorImageFun; +import io.dingodb.exec.fun.vector.VectorTextFun; +import io.dingodb.exec.restful.VectorExtract; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlCharStringLiteral; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNumericLiteral; +import org.apache.calcite.sql.fun.SqlArrayValueConstructor; +import org.apache.calcite.util.NlsString; + +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +public final class VectorUtils { + + private VectorUtils(){} + + private static final int MULTIPLE = 8; + + public static byte[] parseBinaryStringToBinary(List operandsList) { + if (operandsList.get(2) instanceof SqlCharStringLiteral) { + String binaryString = ((NlsString)((SqlCharStringLiteral) operandsList.get(2)).getValue()).getValue(); + int length = binaryString.length(); + int segmentLength = length / MULTIPLE; + byte[] byteArray = new byte[segmentLength]; + for (int i = 0; i < segmentLength; i++) { + String segment = binaryString.substring(i * MULTIPLE, (i + 1) * MULTIPLE); + byteArray[i] = (byte) Integer.parseInt(segment, 2); + } + return byteArray; + } + throw new RuntimeException("vector load binary string param error"); + } + + public static byte[] parseBinaryStringToByteArray(List operandsList) { + if (operandsList.get(2) instanceof SqlCharStringLiteral) { + String binaryString = ((NlsString)((SqlCharStringLiteral) operandsList.get(2)).getValue()).getValue(); + int length = binaryString.length(); + byte[] byteArray = new byte[length]; + for (int i = 0; i < length; i++) { + char c = binaryString.charAt(i); + byteArray[i] = (byte) c; + } + return byteArray; + } + throw new RuntimeException("vector load binary string param error"); + } + + public static Float[] getVectorFloats(List operandsList) { + Float[] floatArray = null; + Object call = operandsList.get(2); + if (call instanceof RexCall) { + RexCall rexCall = (RexCall) call; + floatArray = new Float[rexCall.getOperands().size()]; + int vectorDimension = rexCall.getOperands().size(); + for (int i = 0; i < vectorDimension; i++) { + RexLiteral literal = (RexLiteral) rexCall.getOperands().get(i); + floatArray[i] = literal.getValueAs(Float.class); + } + return floatArray; + } + SqlBasicCall basicCall = (SqlBasicCall) operandsList.get(2); + if (basicCall.getOperator() instanceof SqlArrayValueConstructor) { + List operands = basicCall.getOperandList(); + floatArray = new Float[operands.size()]; + for (int i = 0; i < operands.size(); i++) { + floatArray[i] = ( + (Number) Objects.requireNonNull(((SqlNumericLiteral) operands.get(i)).getValue()) + ).floatValue(); + } + } else { + List sqlNodes = basicCall.getOperandList(); + if (sqlNodes.size() < 2) { + throw new RuntimeException("vector load param error"); + } + List paramList = sqlNodes.stream().map(e -> { + if (e instanceof SqlLiteral) { + return ((SqlLiteral)e).getValue(); + } else if (e instanceof SqlIdentifier) { + return ((SqlIdentifier)e).getSimple(); + } else { + return e.toString(); + } + }).collect(Collectors.toList()); + if (paramList.get(1) == null || paramList.get(0) == null) { + throw new RuntimeException("vector load param error"); + } + String param = paramList.get(1).toString(); + if (param.contains("'")) { + param = param.replace("'", ""); + } + String funcName = basicCall.getOperator().getName(); + if (funcName.equalsIgnoreCase(VectorTextFun.NAME)) { + floatArray = VectorExtract.getTxtVector( + basicCall.getOperator().getName(), + paramList.get(0).toString(), + param); + } else if (funcName.equalsIgnoreCase(VectorImageFun.NAME)) { + if (paramList.size() < 3) { + throw new RuntimeException("vector load param error"); + } + Object localPath = paramList.get(2); + if (!(localPath instanceof Boolean)) { + throw new RuntimeException("vector load param error"); + } + floatArray = VectorExtract.getImgVector( + basicCall.getOperator().getName(), + paramList.get(0).toString(), + paramList.get(1), + (Boolean) paramList.get(2)); + } + } + if (floatArray == null) { + throw new RuntimeException("vector load error"); + } + return floatArray; + } +} diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoGetVectorByDistanceVisitFun.java b/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoGetVectorByDistanceVisitFun.java index 8fb6b9a6d9..8158fc1d17 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoGetVectorByDistanceVisitFun.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoGetVectorByDistanceVisitFun.java @@ -40,8 +40,9 @@ import java.util.function.Supplier; import static io.dingodb.calcite.rel.DingoRel.dingo; +import static io.dingodb.calcite.utils.VectorUtils.getVectorFloats; +import static io.dingodb.calcite.utils.VectorUtils.parseBinaryStringToByteArray; import static io.dingodb.calcite.visitor.function.DingoVectorVisitFun.getTopkParam; -import static io.dingodb.calcite.visitor.function.DingoVectorVisitFun.getVectorFloats; import static io.dingodb.exec.utils.OperatorCodeUtils.VECTOR_POINT_DISTANCE; public final class DingoGetVectorByDistanceVisitFun { @@ -67,8 +68,20 @@ static class OperatorSupplier implements Supplier { @Override public Vertex get() { DingoRelOptTable dingoRelOptTable = (DingoRelOptTable) rel.getTable(); - List targetVector = getTargetVector(rel.getOperands()); - IndexTable indexTable = getVectorIndexTable(dingoRelOptTable, targetVector.size()); + int dimension; + List targetVector = null; + byte[] binaryVector = null; + boolean isBinaryVector = false; + if (((IndexTable) rel.getIndexTable()).getIndexType() == IndexType.VECTOR_BINARY_FLAT || + ((IndexTable) rel.getIndexTable()).getIndexType() == IndexType.VECTOR_BINARY_IVF_FLAT) { + binaryVector = getBinaryVector(rel.getOperands()); + dimension = binaryVector.length; + isBinaryVector = true; + } else { + targetVector = getTargetVector(rel.getOperands()); + dimension = targetVector.size(); + } + IndexTable indexTable = getVectorIndexTable(dingoRelOptTable, dimension); if (indexTable == null) { throw new RuntimeException("not found vector index"); } @@ -76,8 +89,6 @@ public Vertex get() { NavigableMap distributions = metaService.getRangeDistribution(rel.getIndexTableId()); - int dimension = Integer.parseInt(indexTable.getProperties() - .getOrDefault("dimension", targetVector.size()).toString()); String algType; if (indexTable.indexType == IndexType.VECTOR_FLAT) { algType = "FLAT"; @@ -94,7 +105,9 @@ public Vertex get() { distributions.firstEntry().getValue(), rel.getVectorIndex(), rel.getIndexTableId(), + isBinaryVector, targetVector, + binaryVector, dimension, algType, indexTable.getProperties().getProperty("metricType"), @@ -106,6 +119,10 @@ public Vertex get() { } } + public static byte[] getBinaryVector(List operandList) { + return parseBinaryStringToByteArray(operandList); + } + public static List getTargetVector(List operandList) { Float[] vector = getVectorFloats(operandList); return Arrays.asList(vector); diff --git a/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoVectorVisitFun.java b/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoVectorVisitFun.java index 8153496d76..1da42cacc3 100644 --- a/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoVectorVisitFun.java +++ b/dingo-calcite/src/main/java/io/dingodb/calcite/visitor/function/DingoVectorVisitFun.java @@ -43,11 +43,8 @@ import io.dingodb.exec.base.Task; import io.dingodb.exec.dag.Vertex; import io.dingodb.exec.expr.SqlExpr; -import io.dingodb.exec.fun.vector.VectorImageFun; -import io.dingodb.exec.fun.vector.VectorTextFun; import io.dingodb.exec.operator.params.PartVectorParam; import io.dingodb.exec.operator.params.TxnPartVectorParam; -import io.dingodb.exec.restful.VectorExtract; import io.dingodb.exec.transaction.base.ITransaction; import io.dingodb.expr.rel.RelOp; import io.dingodb.expr.rel.op.RelOpBuilder; @@ -55,20 +52,17 @@ import io.dingodb.meta.MetaService; import io.dingodb.meta.entity.Column; import io.dingodb.meta.entity.IndexTable; +import io.dingodb.meta.entity.IndexType; import io.dingodb.meta.entity.Table; import lombok.extern.slf4j.Slf4j; -import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlLiteral; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlNumericLiteral; -import org.apache.calcite.sql.fun.SqlArrayValueConstructor; import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.Mappings; import org.checkerframework.checker.nullness.qual.NonNull; @@ -84,6 +78,8 @@ import java.util.stream.Collectors; import static io.dingodb.calcite.rel.LogicalDingoTableScan.getIndexMetricType; +import static io.dingodb.calcite.utils.VectorUtils.getVectorFloats; +import static io.dingodb.calcite.utils.VectorUtils.parseBinaryStringToByteArray; import static io.dingodb.common.util.Utils.isNeedLookUp; import static io.dingodb.exec.utils.OperatorCodeUtils.PART_VECTOR; import static io.dingodb.exec.utils.OperatorCodeUtils.TXN_PART_VECTOR; @@ -193,7 +189,15 @@ public static Collection visit( CommonId tableId = dingoTable.getTableId(); NavigableMap ranges = MetaService.root(visitor.getPointTs()) .getRangeDistribution(tableId); - Float[] floatArray = getVectorFloats(operandsList); + boolean isBinaryVector = false; + byte [] binaryBytes = null; + Float[] floatArray = null; + if (indexTable.indexType == IndexType.VECTOR_BINARY_FLAT || indexTable.indexType == IndexType.VECTOR_BINARY_IVF_FLAT) { + isBinaryVector = true; + binaryBytes = parseBinaryStringToByteArray(operandsList); + } else { + floatArray = getVectorFloats(operandsList); + } int topN = ((Number) Objects.requireNonNull(((SqlNumericLiteral) operandsList.get(3)).getValue())).intValue(); List outputs = new ArrayList<>(); // Create tasks based on partitions @@ -240,7 +244,9 @@ public static Collection visit( td, ranges, floatArray, + binaryBytes, topN, + isBinaryVector, parameterMap, indexTable, relOp, @@ -267,75 +273,6 @@ public static Collection visit( return outputs; } - public static Float[] getVectorFloats(List operandsList) { - Float[] floatArray = null; - Object call = operandsList.get(2); - if (call instanceof RexCall) { - RexCall rexCall = (RexCall) call; - floatArray = new Float[rexCall.getOperands().size()]; - int vectorDimension = rexCall.getOperands().size(); - for (int i = 0; i < vectorDimension; i++) { - RexLiteral literal = (RexLiteral) rexCall.getOperands().get(i); - floatArray[i] = literal.getValueAs(Float.class); - } - return floatArray; - } - SqlBasicCall basicCall = (SqlBasicCall) operandsList.get(2); - if (basicCall.getOperator() instanceof SqlArrayValueConstructor) { - List operands = basicCall.getOperandList(); - floatArray = new Float[operands.size()]; - for (int i = 0; i < operands.size(); i++) { - floatArray[i] = ( - (Number) Objects.requireNonNull(((SqlNumericLiteral) operands.get(i)).getValue()) - ).floatValue(); - } - } else { - List sqlNodes = basicCall.getOperandList(); - if (sqlNodes.size() < 2) { - throw new RuntimeException("vector load param error"); - } - List paramList = sqlNodes.stream().map(e -> { - if (e instanceof SqlLiteral) { - return ((SqlLiteral)e).getValue(); - } else if (e instanceof SqlIdentifier) { - return ((SqlIdentifier)e).getSimple(); - } else { - return e.toString(); - } - }).collect(Collectors.toList()); - if (paramList.get(1) == null || paramList.get(0) == null) { - throw new RuntimeException("vector load param error"); - } - String param = paramList.get(1).toString(); - if (param.contains("'")) { - param = param.replace("'", ""); - } - String funcName = basicCall.getOperator().getName(); - if (funcName.equalsIgnoreCase(VectorTextFun.NAME)) { - floatArray = VectorExtract.getTxtVector( - basicCall.getOperator().getName(), - paramList.get(0).toString(), - param); - } else if (funcName.equalsIgnoreCase(VectorImageFun.NAME)) { - if (paramList.size() < 3) { - throw new RuntimeException("vector load param error"); - } - Object localPath = paramList.get(2); - if (!(localPath instanceof Boolean)) { - throw new RuntimeException("vector load param error"); - } - floatArray = VectorExtract.getImgVector( - basicCall.getOperator().getName(), - paramList.get(0).toString(), - paramList.get(1), - (Boolean) paramList.get(2)); - } - } - if (floatArray == null) { - throw new RuntimeException("vector load error"); - } - return floatArray; - } public static Integer getTopkParam(List operandsList) { return ((Number) Objects.requireNonNull(((SqlNumericLiteral) operandsList.get(3)).getValue())).intValue(); diff --git a/dingo-calcite/src/main/java/org/apache/calcite/sql/validate/TableFunctionNamespace.java b/dingo-calcite/src/main/java/org/apache/calcite/sql/validate/TableFunctionNamespace.java index 9f64ad5433..5df4c937ac 100644 --- a/dingo-calcite/src/main/java/org/apache/calcite/sql/validate/TableFunctionNamespace.java +++ b/dingo-calcite/src/main/java/org/apache/calcite/sql/validate/TableFunctionNamespace.java @@ -39,11 +39,14 @@ import org.checkerframework.checker.nullness.qual.Nullable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; -import java.util.Objects; import java.util.stream.Collectors; import static io.dingodb.calcite.type.converter.DefinitionMapper.mapToRelDataType; +import static io.dingodb.calcite.utils.VectorUtils.parseBinaryStringToByteArray; +import static io.dingodb.exec.transaction.util.BinaryVectorUtils.checkBinaryVector; public class TableFunctionNamespace extends AbstractNamespace { @@ -91,6 +94,12 @@ protected RelDataType validateImpl(RelDataType targetRowType) { Table table = dingoTable.getTable(); this.index = getVectorIndexTable(table, columnIdentifier.getSimple().toUpperCase()); + if (((IndexTable)index).getIndexType() == IndexType.VECTOR_BINARY_FLAT + || ((IndexTable)index).getIndexType() == IndexType.VECTOR_BINARY_IVF_FLAT) { + byte[] binaryBytes = parseBinaryStringToByteArray(new ArrayList<>(operandList)); + int dimension = Integer.parseInt(index.getProperties().getProperty("dimension")); + checkBinaryVector(binaryBytes, dimension); + } cols.add(Column .builder() .name(index.getName().concat("$distance")) diff --git a/dingo-calcite/src/main/java/org/apache/calcite/sql2rel/SqlHammingDistanceOperator.java b/dingo-calcite/src/main/java/org/apache/calcite/sql2rel/SqlHammingDistanceOperator.java new file mode 100644 index 0000000000..1bbd343d76 --- /dev/null +++ b/dingo-calcite/src/main/java/org/apache/calcite/sql2rel/SqlHammingDistanceOperator.java @@ -0,0 +1,61 @@ +/* + * Copyright 2021 DataCanvas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.calcite.sql2rel; + +import io.dingodb.calcite.fun.DingoInferTypes; +import org.apache.calcite.sql.SqlBinaryOperator; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlSyntax; +import org.apache.calcite.sql.SqlWriter; +import org.apache.calcite.sql.type.SqlTypeFamily; + +import static org.apache.calcite.sql.type.OperandTypes.family; +import static org.apache.calcite.sql.type.SqlAppointReturnTypeInference.FLOAT; + +public class SqlHammingDistanceOperator extends SqlBinaryOperator { + + /** + * Creates a SqlBinaryOperator. + * + * @param name Name of operator + * @param kind Kind + */ + public SqlHammingDistanceOperator(String name, SqlKind kind) { + super( + name, + kind, + 24, + true, + FLOAT, + DingoInferTypes.FLOAT, + family(SqlTypeFamily.BINARY, SqlTypeFamily.BINARY) + ); + } + + @Override + public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) { + writer.keyword(call.operand(0).toString()); + writer.keyword("<->"); + writer.keyword(call.operand(1).toString()); + } + + @Override + public SqlSyntax getSyntax() { + return SqlSyntax.FUNCTION; + } +} diff --git a/dingo-common/src/main/java/io/dingodb/common/vector/VectorCalcDistance.java b/dingo-common/src/main/java/io/dingodb/common/vector/VectorCalcDistance.java index c426d36b82..4c234ba55b 100644 --- a/dingo-common/src/main/java/io/dingodb/common/vector/VectorCalcDistance.java +++ b/dingo-common/src/main/java/io/dingodb/common/vector/VectorCalcDistance.java @@ -29,10 +29,16 @@ public class VectorCalcDistance { private Long vectorId; + private boolean isBinaryVector; + private List> leftList; private List> rightList; + private List leftBinaryValues; + + private List rightBinaryValues; + private String algorithmType; private String metricType; diff --git a/dingo-common/src/main/java/io/dingodb/common/vector/VectorSearchResponse.java b/dingo-common/src/main/java/io/dingodb/common/vector/VectorSearchResponse.java index b582973bc0..f2b40e2598 100644 --- a/dingo-common/src/main/java/io/dingodb/common/vector/VectorSearchResponse.java +++ b/dingo-common/src/main/java/io/dingodb/common/vector/VectorSearchResponse.java @@ -33,4 +33,6 @@ public class VectorSearchResponse { List floatValues; + List binaryValues; + } diff --git a/dingo-exec/src/main/java/io/dingodb/exec/fun/DingoFunFactory.java b/dingo-exec/src/main/java/io/dingodb/exec/fun/DingoFunFactory.java index 9295044939..3aa630dc45 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/fun/DingoFunFactory.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/fun/DingoFunFactory.java @@ -29,6 +29,7 @@ import io.dingodb.exec.fun.special.ThrowFun; import io.dingodb.exec.fun.vector.VectorCosineDistanceFun; import io.dingodb.exec.fun.vector.VectorDistanceFun; +import io.dingodb.exec.fun.vector.VectorHammingDistanceFun; import io.dingodb.exec.fun.vector.VectorIPDistanceFun; import io.dingodb.exec.fun.vector.VectorImageFun; import io.dingodb.exec.fun.vector.VectorL2DistanceFun; @@ -55,6 +56,7 @@ private DingoFunFactory() { registerBinaryFun(VectorL2DistanceFun.NAME, VectorL2DistanceFun.INSTANCE); registerBinaryFun(VectorIPDistanceFun.NAME, VectorIPDistanceFun.INSTANCE); registerBinaryFun(VectorCosineDistanceFun.NAME, VectorCosineDistanceFun.INSTANCE); + registerBinaryFun(VectorHammingDistanceFun.NAME, VectorHammingDistanceFun.INSTANCE); registerBinaryFun(VectorDistanceFun.NAME, VectorDistanceFun.INSTANCE); registerNullaryFun(VersionFun.NAME, VersionFun.INSTANCE); registerBinaryFun(JsonExtractFun.NAME, JsonExtractFun.INSTANCE); diff --git a/dingo-exec/src/main/java/io/dingodb/exec/fun/vector/VectorHammingDistanceFun.java b/dingo-exec/src/main/java/io/dingodb/exec/fun/vector/VectorHammingDistanceFun.java new file mode 100644 index 0000000000..f5b0d14b28 --- /dev/null +++ b/dingo-exec/src/main/java/io/dingodb/exec/fun/vector/VectorHammingDistanceFun.java @@ -0,0 +1,54 @@ +/* + * Copyright 2021 DataCanvas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.dingodb.exec.fun.vector; + +import io.dingodb.exec.transaction.util.BinaryVectorUtils; +import io.dingodb.expr.common.type.Type; +import io.dingodb.expr.common.type.Types; +import io.dingodb.expr.runtime.ExprConfig; +import io.dingodb.expr.runtime.op.BinaryOp; +import io.dingodb.expr.runtime.op.OpKey; +import lombok.AccessLevel; +import lombok.RequiredArgsConstructor; +import org.checkerframework.checker.nullness.qual.NonNull; + +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +public class VectorHammingDistanceFun extends BinaryVectorVectorFun { + public static final VectorHammingDistanceFun INSTANCE = new VectorHammingDistanceFun(); + public static final String NAME = "hammingDistance"; + private static final long serialVersionUID = 5838335595005443866L; + + @Override + protected Object evalNonNullValue(@NonNull Object value0, @NonNull Object value1, ExprConfig config) { + return (float)BinaryVectorUtils.hammingDistance((byte[]) value0, (byte[]) value1); + } + + @Override + public @NonNull String getName() { + return NAME; + } + + @Override + public Type getType() { + return Types.BYTES; + } + + @Override + public BinaryOp getOp(OpKey key) { + return INSTANCE; + } +} diff --git a/dingo-exec/src/main/java/io/dingodb/exec/operator/PartVectorOperator.java b/dingo-exec/src/main/java/io/dingodb/exec/operator/PartVectorOperator.java index ad13d203d7..2c31f5e08f 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/operator/PartVectorOperator.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/operator/PartVectorOperator.java @@ -53,7 +53,11 @@ private PartVectorOperator() { param.getIndexId(), param.getFloatArray(), param.getTopN(), - param.getParameterMap()); + param.getParameterMap(), + false, + false, + null + ); for (VectorSearchResponse response : searchResponseList) { CommonId regionId = PartitionService.getService( Optional.ofNullable(param.getTable().getPartitionStrategy()) diff --git a/dingo-exec/src/main/java/io/dingodb/exec/operator/TxnPartVectorOperator.java b/dingo-exec/src/main/java/io/dingodb/exec/operator/TxnPartVectorOperator.java index a1e3678788..ce9f32ade9 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/operator/TxnPartVectorOperator.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/operator/TxnPartVectorOperator.java @@ -48,6 +48,8 @@ import static io.dingodb.exec.fun.vector.VectorIPDistanceFun.innerProduct; import static io.dingodb.exec.fun.vector.VectorL2DistanceFun.l2DistanceCombine; import static io.dingodb.exec.operator.TxnGetByKeysOperator.getLocalStore; +import static io.dingodb.exec.transaction.util.BinaryVectorUtils.getBinaryVector; +import static io.dingodb.exec.transaction.util.BinaryVectorUtils.hammingDistance; @Slf4j public class TxnPartVectorOperator extends FilterProjectSourceOperator { @@ -74,7 +76,9 @@ public class TxnPartVectorOperator extends FilterProjectSourceOperator { param.getTopN(), param.getParameterMap(), param.getCoprocessor(), - param.isDiskAnnVector() + param.isDiskAnnVector(), + param.isBinaryVector(), + param.isBinaryVector() ? getBinaryVector(param.getBinaryBytes(), param.getBinaryBytes().length) : null ); List results = new ArrayList<>(); if (param.isLookUp()) { @@ -119,8 +123,14 @@ public class TxnPartVectorOperator extends FilterProjectSourceOperator { distance = cosine((List) ov, targetVector); } objects[objects.length - 1] = distance; + } else if (ov instanceof byte[]) { + float distance = 0.0f; + if (distanceType.contains("HAMMING")) { + distance = hammingDistance(param.getBinaryBytes(), (byte[]) ov); + } + objects[objects.length - 1] = distance; } else { - objects[objects.length - 1] = 0.0; + objects[objects.length - 1] = 0.0f; } } results.add(objects); @@ -135,7 +145,9 @@ public class TxnPartVectorOperator extends FilterProjectSourceOperator { } Object[] decode = param.getCodec().decode(keyValue); decode[decode.length - 1] = response.getDistance(); - decode[vecIdx] = response.getFloatValues(); + if (!param.isBinaryVector()) { + decode[vecIdx] = response.getFloatValues(); + } //vecPriIdxMapping.forEach((key, value) -> decode[value] = vecTuples[key]); results.add(decode); diff --git a/dingo-exec/src/main/java/io/dingodb/exec/operator/VectorPointDistanceOperator.java b/dingo-exec/src/main/java/io/dingodb/exec/operator/VectorPointDistanceOperator.java index 4f1eb2ece1..a7477684fc 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/operator/VectorPointDistanceOperator.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/operator/VectorPointDistanceOperator.java @@ -38,6 +38,8 @@ import java.util.List; import java.util.stream.Collectors; +import static io.dingodb.exec.transaction.util.BinaryVectorUtils.getBinaryVector; + @Slf4j public class VectorPointDistanceOperator extends SoleOutOperator { @@ -66,46 +68,93 @@ public void fin(int pin, @Nullable Fin fin, Vertex vertex) { edge.fin(fin); return; } - List> rightList = cache.stream().map(e -> - (List) e[param.getVectorIndex()] - ).collect(Collectors.toList()); - int topn = param.getTopk(); - if (rightList.isEmpty()) { - edge.fin(fin); - return; - } - List floatArray = new ArrayList<>(); - List>> partition = Lists.partition(rightList, 1024); - for (List> right : partition) { - VectorCalcDistance vectorCalcDistance = VectorCalcDistance.builder() - .topN(topn) - .leftList(Collections.singletonList(param.getTargetVector())) - .rightList(right) - .dimension(param.getDimension()) - .algorithmType(param.getAlgType()) - .metricType(param.getMetricType()) - .build(); - floatArray.addAll(ToolService.getDefault().vectorCalcDistance( - param.getRangeDistribution().getId(), - vectorCalcDistance).get(0)); - } - List> pairList = new ArrayList<>(); - for (int i = 0; i < cache.size(); i ++) { - Object[] tuple = cache.get(i); - Object[] result = Arrays.copyOf(tuple, tuple.length + 1); - result[tuple.length] = floatArray.get(i); - pairList.add(new Pair<>((Float) result[tuple.length], result)); - } - Collections.sort(pairList, Comparator.comparing(p -> (Float)p.getKey())); - int count = 0; - Object[] value; + if (!param.isBinaryVector()) { + List> rightList = cache.stream().map(e -> + (List) e[param.getVectorIndex()] + ).collect(Collectors.toList()); + int topn = param.getTopk(); + if (rightList.isEmpty()) { + edge.fin(fin); + return; + } + List floatArray = new ArrayList<>(); + List>> partition = Lists.partition(rightList, 1024); + for (List> right : partition) { + VectorCalcDistance vectorCalcDistance = VectorCalcDistance.builder() + .topN(topn) + .isBinaryVector(false) + .leftList(Collections.singletonList(param.getTargetVector())) + .rightList(right) + .dimension(param.getDimension()) + .algorithmType(param.getAlgType()) + .metricType(param.getMetricType()) + .build(); + floatArray.addAll(ToolService.getDefault().vectorCalcDistance( + param.getRangeDistribution().getId(), + vectorCalcDistance).get(0)); + } + List> pairList = new ArrayList<>(); + for (int i = 0; i < cache.size(); i ++) { + Object[] tuple = cache.get(i); + Object[] result = Arrays.copyOf(tuple, tuple.length + 1); + result[tuple.length] = floatArray.get(i); + pairList.add(new Pair<>((Float) result[tuple.length], result)); + } + Collections.sort(pairList, Comparator.comparing(p -> (Float)p.getKey())); + int count = 0; + Object[] value; + + for (Pair pair : pairList) { + if (count < topn) { + value = pair.getValue(); + edge.transformToNext(param.getContext(), selection.revMap(value)); + } + count++; + } + } else { + List rightList = cache.stream() + .map(e -> (byte[])e[param.getVectorIndex()]) + .collect(Collectors.toList()); + int topn = param.getTopk(); + if (rightList.isEmpty()) { + edge.fin(fin); + return; + } + List floatArray = new ArrayList<>(); + byte[] leftBinaryValues = getBinaryVector(param.getBinaryVector(), param.getDimension()); + for (byte[] right : rightList) { + byte[] rightBinaryValues = getBinaryVector(right, param.getDimension()); + VectorCalcDistance vectorCalcDistance = VectorCalcDistance.builder() + .topN(topn) + .isBinaryVector(true) + .leftBinaryValues(Collections.singletonList(leftBinaryValues)) + .rightBinaryValues(Collections.singletonList(rightBinaryValues)) + .dimension(param.getDimension()) + .algorithmType(param.getAlgType()) + .metricType(param.getMetricType()) + .build(); + floatArray.addAll(ToolService.getDefault().vectorCalcDistance( + param.getRangeDistribution().getId(), + vectorCalcDistance).get(0)); + } + List> pairList = new ArrayList<>(); + for (int i = 0; i < cache.size(); i ++) { + Object[] tuple = cache.get(i); + Object[] result = Arrays.copyOf(tuple, tuple.length + 1); + result[tuple.length] = floatArray.get(i); + pairList.add(new Pair<>((Float) result[tuple.length], result)); + } + Collections.sort(pairList, Comparator.comparing(p -> (Float)p.getKey())); + int count = 0; + Object[] value; - for (Pair pair : pairList) { - if (count < topn) { - value = pair.getValue(); - edge.transformToNext(param.getContext(), selection.revMap(value)); + for (Pair pair : pairList) { + if (count < topn) { + value = pair.getValue(); + edge.transformToNext(param.getContext(), selection.revMap(value)); + } + count++; } - count++; } param.clear(); diff --git a/dingo-exec/src/main/java/io/dingodb/exec/operator/params/TxnPartVectorParam.java b/dingo-exec/src/main/java/io/dingodb/exec/operator/params/TxnPartVectorParam.java index 35e360b284..bc246e8f82 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/operator/params/TxnPartVectorParam.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/operator/params/TxnPartVectorParam.java @@ -62,7 +62,9 @@ public class TxnPartVectorParam extends FilterProjectSourceParam { private final NavigableMap distributions; private final CommonId indexId; private final Float[] floatArray; + private final byte[] binaryBytes; private final int topN; + private final boolean isBinaryVector; private final Map parameterMap; private final IndexTable indexTable; @@ -95,7 +97,9 @@ public TxnPartVectorParam( Table table, NavigableMap distributions, Float[] floatArray, + byte[] binaryBytes, int topN, + boolean isBinaryVector, Map parameterMap, Table indexTable, RelOp relOp, @@ -114,7 +118,9 @@ public TxnPartVectorParam( this.distributions = distributions; this.indexId = indexTable.tableId; this.floatArray = floatArray; + this.binaryBytes = binaryBytes; this.topN = topN; + this.isBinaryVector = isBinaryVector; this.parameterMap = parameterMap; this.indexTable = (IndexTable) indexTable; this.pushDown = pushDown; diff --git a/dingo-exec/src/main/java/io/dingodb/exec/operator/params/VectorPointDistanceParam.java b/dingo-exec/src/main/java/io/dingodb/exec/operator/params/VectorPointDistanceParam.java index 88e09bfc8d..1cbdce9901 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/operator/params/VectorPointDistanceParam.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/operator/params/VectorPointDistanceParam.java @@ -36,7 +36,11 @@ public class VectorPointDistanceParam extends AbstractParams { private final Integer vectorIndex; + private final boolean isBinaryVector; + private final List targetVector; + + private final byte[] binaryVector; @JsonProperty("dimension") private final Integer dimension; @@ -56,7 +60,9 @@ public VectorPointDistanceParam( RangeDistribution rangeDistribution, Integer vectorIndex, CommonId indexTableId, + boolean isBinaryVector, List targetVector, + byte[] binaryVector, Integer dimension, String algType, String metricType, @@ -65,7 +71,9 @@ public VectorPointDistanceParam( ) { this.rangeDistribution = rangeDistribution; this.vectorIndex = vectorIndex; + this.isBinaryVector = isBinaryVector; this.targetVector = targetVector; + this.binaryVector = binaryVector; this.dimension = dimension; this.algType = algType; this.metricType = metricType; diff --git a/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/BinaryVectorUtils.java b/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/BinaryVectorUtils.java new file mode 100644 index 0000000000..d2fbd87f96 --- /dev/null +++ b/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/BinaryVectorUtils.java @@ -0,0 +1,73 @@ +/* + * Copyright 2021 DataCanvas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.dingodb.exec.transaction.util; + +public class BinaryVectorUtils { + + private static final int MULTIPLE = 8; + + private BinaryVectorUtils() {} + + public static void checkBinaryVector(byte[] binaryVector, int dimension) { + if (dimension == 0 || dimension % MULTIPLE != 0) { + throw new IllegalArgumentException("Dimension must be a positive multiple of " + MULTIPLE); + } + if (binaryVector.length != dimension) { + throw new IllegalArgumentException("The length of the binary vector " + + "does not match the specified dimension"); + } + + // ASCII values for '0' and '1' + final byte ASCII_ZERO = 48; // '0' + final byte ASCII_ONE = 49; // '1' + + for (int i = 0; i < binaryVector.length; i++) { + if (binaryVector[i] != ASCII_ZERO && binaryVector[i] != ASCII_ONE) { + throw new IllegalArgumentException( + String.format("Invalid binary value at index %d: expected '0' or '1', but was '%c'.", + i, (char)binaryVector[i]) + ); + } + } + } + + public static byte[] getBinaryVector(byte[] binaryVector, int dimension) { + int segmentLength = (dimension + (MULTIPLE - 1)) / MULTIPLE; + byte[] byteArray = new byte[segmentLength]; + for (int i = 0; i < dimension; i++) { + byteArray[i / MULTIPLE] |= ((binaryVector[i] & 0x01) << ((MULTIPLE - 1) - (i % MULTIPLE))); + } + return byteArray; + } + + public static int hammingDistance(byte[] x, byte[] y) { + if (x == null || y == null || x.length != y.length) { + throw new IllegalArgumentException("Arrays must be non-null and of the same length."); + } + int distance = 0; + for (int i = 0; i < x.length; i++) { + byte xor = (byte) (x[i] ^ y[i]); + while (xor != 0) { + distance++; + xor = (byte) (xor & (xor - 1)); + } + } + + return distance; + } + +} diff --git a/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/TransactionCacheToMutation.java b/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/TransactionCacheToMutation.java index f7cb5314df..b6ebf7e616 100644 --- a/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/TransactionCacheToMutation.java +++ b/dingo-exec/src/main/java/io/dingodb/exec/transaction/util/TransactionCacheToMutation.java @@ -65,6 +65,9 @@ import java.util.Map; import java.util.stream.Collectors; +import static io.dingodb.exec.transaction.util.BinaryVectorUtils.checkBinaryVector; +import static io.dingodb.exec.transaction.util.BinaryVectorUtils.getBinaryVector; + @Slf4j public final class TransactionCacheToMutation { @@ -181,7 +184,18 @@ public static Mutation cacheToMutation(@Nullable int op, @NonNull byte[] key, } else { Column column1 = index.getColumns().get(1); Vector vector; - if (column1.getElementTypeName().equalsIgnoreCase("FLOAT")) { + if (column1.getSqlTypeName().equalsIgnoreCase("BINARY")) { + byte[] values = (byte[]) record[colNames.indexOf(column1.getName())]; + int dimension = Integer.parseInt(index.getProperties().getProperty("dimension")); + checkBinaryVector(values, dimension); + byte[] bytes = getBinaryVector(values, dimension); + vector = Vector.builder() + .dimension(dimension) + .binaryValues(Collections.singletonList(bytes)) + .valueType(Vector.ValueType.UINT8) + .build(); + record[colNames.indexOf(column1.getName())] = new byte[]{}; + } else if (column1.getElementTypeName().equalsIgnoreCase("FLOAT")) { List values = (List) record[colNames.indexOf(column1.getName())]; vector = Vector.builder() .dimension(values.size()) @@ -211,6 +225,7 @@ public static Mutation cacheToMutation(@Nullable int op, @NonNull byte[] key, return new Mutation(Op.forNumber(op), key, value, forUpdateTs, vectorWithId, documentWithId); } + /** * transform TxnLocalData to Mutation. * @param txnLocalData a TxnLocalData object to tranform to Mutation. diff --git a/dingo-executor/src/main/java/io/dingodb/server/executor/ddl/DdlWorker.java b/dingo-executor/src/main/java/io/dingodb/server/executor/ddl/DdlWorker.java index 4901c497d5..835b0a8281 100644 --- a/dingo-executor/src/main/java/io/dingodb/server/executor/ddl/DdlWorker.java +++ b/dingo-executor/src/main/java/io/dingodb/server/executor/ddl/DdlWorker.java @@ -47,6 +47,8 @@ import io.dingodb.meta.MetaService; import io.dingodb.meta.SequenceService; import io.dingodb.meta.entity.Column; +import io.dingodb.meta.entity.IndexTable; +import io.dingodb.meta.entity.IndexType; import io.dingodb.meta.entity.Table; import io.dingodb.sdk.service.entity.meta.ColumnDefinition; import io.dingodb.sdk.service.entity.meta.DingoCommonId; @@ -484,6 +486,12 @@ public static Pair onTruncateTable(DdlContext dc, DdlJob job) { if (job.getArgs() != null) { newTableId = (long) job.getArgs().get(0); } + Pair checkDropDiskAnn = ms.checkDropDiskAnnIndex(job.getTableName()); + if (checkDropDiskAnn.getKey()) { + job.setDingoErr(DingoErrUtil.newInternalErr(checkDropDiskAnn.getValue())); + job.setState(JobState.jobStateCancelled); + return Pair.of(0L, job.getDingoErr().errorMsg); + } if (job.getSchemaState() == SchemaState.SCHEMA_PUBLIC) { Pair res = updateSchemaVersion(dc, job); job.setSchemaState(SchemaState.SCHEMA_GLOBAL_TXN_ONLY); @@ -536,6 +544,20 @@ public static Pair onDropTable(DdlContext dc, DdlJob job) { Pair res; switch (tableInfo.getTableDefinition().getSchemaState()) { case SCHEMA_PUBLIC: + Table table = InfoSchemaService.root().getTableDef(job.getSchemaId(), job.getTableId()); + List diskAnnIndex = table.getIndexes().stream() + .filter(e -> e.getIndexType() == IndexType.VECTOR_DISKANN) + .collect(Collectors.toList()); + for (IndexTable indexTable : diskAnnIndex) { + // check disk ann index status + Pair checkDropDiskAnn = + MetaService.root().checkDropDiskAnnIndex(indexTable.getTableId()); + if (checkDropDiskAnn.getKey()) { + job.setDingoErr(DingoErrUtil.newInternalErr(checkDropDiskAnn.getValue())); + job.setState(JobState.jobStateCancelled); + return Pair.of(0L, job.getDingoErr().errorMsg); + } + } tableInfo.getTableDefinition() .setSchemaState(SCHEMA_WRITE_ONLY); res = TableUtil.updateVersionAndTableInfos(dc, job, tableInfo, @@ -697,6 +719,15 @@ public static Pair onDropIndex(DdlContext dc, DdlJob job) { case SCHEMA_PUBLIC: indexWithId.getTableDefinition().setSchemaState(SCHEMA_WRITE_ONLY); job.setSchemaState(SchemaState.SCHEMA_WRITE_ONLY); + // check disk ann index status + Pair checkDropDiskAnn = + MetaService.root().checkDropDiskAnnIndex(Mapper.MAPPER.idFrom(indexWithId.getTableId())); + if (checkDropDiskAnn.getKey()) { + job.setDingoErr(DingoErrUtil.newInternalErr(checkDropDiskAnn.getValue())); + job.setState(JobState.jobStateCancelled); + return Pair.of(0L, job.getDingoErr().errorMsg); + } + return TableUtil.updateVersionAndIndexInfos(dc, job, indexWithId, originState != indexWithId.getTableDefinition().getSchemaState() ); diff --git a/dingo-meta-api/src/main/java/io/dingodb/meta/MetaService.java b/dingo-meta-api/src/main/java/io/dingodb/meta/MetaService.java index 105e76e4c1..9dc4b00700 100644 --- a/dingo-meta-api/src/main/java/io/dingodb/meta/MetaService.java +++ b/dingo-meta-api/src/main/java/io/dingodb/meta/MetaService.java @@ -26,6 +26,7 @@ import io.dingodb.common.table.IndexDefinition; import io.dingodb.common.table.TableDefinition; import io.dingodb.common.util.ByteArrayUtils.ComparableByteArray; +import io.dingodb.common.util.Pair; import io.dingodb.meta.entity.Table; import org.checkerframework.checker.nullness.qual.NonNull; @@ -233,6 +234,14 @@ default void dropIndex(CommonId table, CommonId index, long jobId, long startTs) throw new UnsupportedOperationException(); } + default Pair checkDropDiskAnnIndex(@NonNull CommonId index) { + throw new UnsupportedOperationException(); + } + + default Pair checkDropDiskAnnIndex(@NonNull String tableName) { + throw new UnsupportedOperationException(); + } + Map getTableIndexDefinitions(@NonNull CommonId id); /** diff --git a/dingo-meta-api/src/main/java/io/dingodb/meta/entity/IndexType.java b/dingo-meta-api/src/main/java/io/dingodb/meta/entity/IndexType.java index 710d2f78f8..4d8a308457 100644 --- a/dingo-meta-api/src/main/java/io/dingodb/meta/entity/IndexType.java +++ b/dingo-meta-api/src/main/java/io/dingodb/meta/entity/IndexType.java @@ -22,6 +22,8 @@ public enum IndexType { VECTOR_FLAT(true), VECTOR_IVF_FLAT(true), VECTOR_IVF_PQ(true), + VECTOR_BINARY_FLAT(true), + VECTOR_BINARY_IVF_FLAT(true), VECTOR_HNSW(true), VECTOR_DISKANN(true), VECTOR_BRUTEFORCE(true); diff --git a/dingo-store-api/src/main/java/io/dingodb/store/api/StoreInstance.java b/dingo-store-api/src/main/java/io/dingodb/store/api/StoreInstance.java index c64ef4fad0..e1e5fa690e 100644 --- a/dingo-store-api/src/main/java/io/dingodb/store/api/StoreInstance.java +++ b/dingo-store-api/src/main/java/io/dingodb/store/api/StoreInstance.java @@ -154,7 +154,8 @@ default Iterator scan(long requestTs, Range range, CoprocessorV2 copro } default List vectorSearch( - CommonId indexId, Float[] floatArray, int topN, Map parameterMap + CommonId indexId, Float[] floatArray, int topN, Map parameterMap, + boolean isDiskAnn, boolean isBinaryVector, byte[] binaryBytes ) { return vectorSearch( System.identityHashCode(floatArray), @@ -163,13 +164,15 @@ default List vectorSearch( topN, parameterMap, null, - false + isDiskAnn, + isBinaryVector, + binaryBytes ); } default List vectorSearch( long requestTs, CommonId indexId, Float[] floatArray, int topN, Map parameterMap, - CoprocessorV2 coprocessorV2, boolean isDiskAnn + CoprocessorV2 coprocessorV2, boolean isDiskAnn, boolean isBinaryVector, byte[] binaryBytes ) { throw new UnsupportedOperationException(); } diff --git a/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/mapper/IndexMapper.java b/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/mapper/IndexMapper.java index 1186b8ca46..dc311899e1 100644 --- a/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/mapper/IndexMapper.java +++ b/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/mapper/IndexMapper.java @@ -80,6 +80,12 @@ default void setIndex(IndexTable.IndexTableBuilder builder, IndexParameter index case VECTOR_INDEX_TYPE_BRUTEFORCE: builder.indexType(io.dingodb.meta.entity.IndexType.VECTOR_BRUTEFORCE); break; + case VECTOR_INDEX_TYPE_BINARY_FLAT: + builder.indexType(io.dingodb.meta.entity.IndexType.VECTOR_BINARY_FLAT); + break; + case VECTOR_INDEX_TYPE_BINARY_IVF_FLAT: + builder.indexType(io.dingodb.meta.entity.IndexType.VECTOR_BINARY_IVF_FLAT); + break; default: throw new IllegalStateException( "Unexpected value: " + indexParameter.getVectorIndexParameter().getVectorIndexType() @@ -224,6 +230,9 @@ default void resetIndexParameter( case "L2": metricType = MetricType.METRIC_TYPE_L2; break; + case "HAMMING": + metricType = MetricType.METRIC_TYPE_HAMMING; + break; default: throw new IllegalStateException("Unsupported metric type: " + metricType1); } @@ -300,6 +309,35 @@ default void resetIndexParameter( ).build(); break; } + case "BINARY_FLAT": + if (dimension % 8 !=0) { + throw new RuntimeException("The dimension must be a multiple of 8."); + } + vectorIndexParameter = VectorIndexParameter.builder() + .vectorIndexType(VectorIndexType.VECTOR_INDEX_TYPE_BINARY_FLAT) + .vectorIndexParameter( + VectorIndexParameter.VectorIndexParameterNest.BinaryFlatParameter.builder() + .dimension(dimension) + .metricType(metricType) + .build() + ).build(); + break; + case "BINARY_IVF_FLAT": { + if (dimension % 8 !=0) { + throw new RuntimeException("The dimension must be a multiple of 8."); + } + int ncentroids = Integer.valueOf(properties.getOrDefault("ncentroids", "2048")); + vectorIndexParameter = VectorIndexParameter.builder() + .vectorIndexType(VectorIndexType.VECTOR_INDEX_TYPE_BINARY_IVF_FLAT) + .vectorIndexParameter( + VectorIndexParameter.VectorIndexParameterNest.BinaryIvfFlatParameter.builder() + .dimension(dimension) + .metricType(metricType) + .ncentroids(ncentroids) + .build() + ).build(); + break; + } default: throw new IllegalStateException("Unsupported type: " + properties.get("type")); } diff --git a/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/meta/MetaService.java b/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/meta/MetaService.java index 430d79fce8..eca6627487 100644 --- a/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/meta/MetaService.java +++ b/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/meta/MetaService.java @@ -22,6 +22,7 @@ import io.dingodb.common.concurrent.Executors; import io.dingodb.common.ddl.DdlUtil; import io.dingodb.common.ddl.GcDeleteRegion; +import io.dingodb.common.ddl.JobState; import io.dingodb.common.log.LogUtils; import io.dingodb.common.meta.SchemaInfo; import io.dingodb.common.meta.SchemaState; @@ -41,6 +42,7 @@ import io.dingodb.common.util.ByteArrayUtils.ComparableByteArray; import io.dingodb.common.util.DefinitionUtils; import io.dingodb.common.util.Optional; +import io.dingodb.common.util.Pair; import io.dingodb.common.util.Utils; import io.dingodb.meta.DdlService; import io.dingodb.meta.MetaServiceProvider; @@ -87,6 +89,7 @@ import io.dingodb.sdk.service.entity.meta.UpdateTenantRequest; import io.dingodb.store.api.StoreInstance; import io.dingodb.store.proxy.Configuration; +import io.dingodb.store.proxy.mapper.Mapper; import io.dingodb.store.proxy.service.AutoIncrementService; import io.dingodb.store.proxy.service.CodecService; import io.dingodb.store.service.InfoSchemaService; @@ -815,32 +818,52 @@ public void createIndex(CommonId tableId, String tableName, IndexDefinition inde @Override public void dropIndex(CommonId table, CommonId index, long jobId, long startTs) { + dropRegionByTable(index, jobId, startTs); + infoSchemaService.dropIndex(table.seq, index.seq); + } + + @Override + public Pair checkDropDiskAnnIndex(@NonNull String tableName) { + long schemaId = id.getEntityId(); + // Get old table and indexes + TableDefinitionWithId table = Optional.mapOrGet( + infoSchemaService.getTable(schemaId, tableName), __ -> (TableDefinitionWithId) __, () -> null); + List indexList = infoSchemaService.listIndex(schemaId, table.getTableId().getEntityId()); + List indexes = indexList.stream() + .map(object -> (TableDefinitionWithId) object).collect(Collectors.toList()); + for (TableDefinitionWithId index : indexes) { + // check disk ann index status + Pair checkDropDiskAnn = checkDropDiskAnnIndex(MAPPER.idFrom(index.getTableId())); + if (checkDropDiskAnn.getKey()) { + return checkDropDiskAnn; + } + } + return new Pair<>(false, ""); + } + + @Override + public Pair checkDropDiskAnnIndex(@NonNull CommonId index) { + boolean noDelete = false; + String msg = ""; if (isDiskAnnIndex(index)) { Collection rangeDistributions = getRangeDistribution(index) .values(); long tso = tso(); - boolean noDelete = false; - String msg = ""; for (RangeDistribution rangeDistribution : rangeDistributions) { StoreInstance instance = io.dingodb.exec.Services.KV_STORE.getInstance(index, rangeDistribution.id()); String diskAnnStatus = instance.diskAnnStatus(tso, index); if("DISKANN_BUILDING".equalsIgnoreCase(diskAnnStatus)){ - msg = "building"; + msg = "diskann is building, please wait."; noDelete = true; break; } else if ("DISKANN_LOADING".equalsIgnoreCase(diskAnnStatus)) { - msg = "loading"; + msg = "diskann is loading, please wait."; noDelete = true; break; } } - if (noDelete) { - throw new RuntimeException("diskann is " + msg + ", please wait."); - } - } - dropRegionByTable(index, jobId, startTs); - infoSchemaService.dropIndex(table.seq, index.seq); + return new Pair<>(noDelete, msg); } private static boolean isDiskAnnIndex(CommonId index) { @@ -927,7 +950,13 @@ public long truncateTable(long schemaId, @NonNull String tableName, long tableEn List indexList = infoSchemaService.listIndex(schemaId, table.getTableId().getEntityId()); List indexes = indexList.stream() .map(object -> (TableDefinitionWithId) object).collect(Collectors.toList()); - + for (TableDefinitionWithId index : indexes) { + // check disk ann index status + Pair checkDropDiskAnn = checkDropDiskAnnIndex(MAPPER.idFrom(index.getTableId())); + if (checkDropDiskAnn.getKey()) { + throw new RuntimeException(checkDropDiskAnn.getValue()); + } + } // Generate new table ids. boolean autoInc = table.getTableDefinition().getColumns().stream() .anyMatch(io.dingodb.sdk.service.entity.meta.ColumnDefinition::isAutoIncrement); diff --git a/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/service/StoreService.java b/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/service/StoreService.java index 420eb21180..b32b4332a7 100644 --- a/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/service/StoreService.java +++ b/dingo-store-proxy/src/main/java/io/dingodb/store/proxy/service/StoreService.java @@ -578,17 +578,28 @@ public List vectorSearch( int topN, Map parameterMap, CoprocessorV2 coprocessor, - boolean isDiskAnn + boolean isDiskAnn, + boolean isBinaryVector, + byte[] binaryBytes ) { List vectors = new ArrayList<>(); IndexTable indexTable = tableMap.get(indexId); - Vector vector = Vector.builder() - .dimension(Integer.parseInt(indexTable.getProperties().getProperty("dimension"))) - .floatValues(Arrays.asList(floatArray)) - .valueType(ValueType.FLOAT) - .build(); + Vector vector; + if (isBinaryVector) { + vector = Vector.builder() + .dimension(Integer.parseInt(indexTable.getProperties().getProperty("dimension"))) + .binaryValues(singletonList(binaryBytes)) + .valueType(ValueType.UINT8) + .build(); + } else { + vector = Vector.builder() + .dimension(Integer.parseInt(indexTable.getProperties().getProperty("dimension"))) + .floatValues(Arrays.asList(floatArray)) + .valueType(ValueType.FLOAT) + .build(); + } VectorWithId vectorWithId = VectorWithId.builder().vector(vector).build(); vectors.add(vectorWithId); @@ -653,6 +664,7 @@ public List vectorSearch( response = new VectorSearchResponse(); } response.setFloatValues(vectorWithDistance.getVectorWithId().getVector().getFloatValues()); + response.setBinaryValues(vectorWithDistance.getVectorWithId().getVector().getBinaryValues()); if (vectorWithDistance.getVectorWithId().getTableData() != null) { response.setKey(vectorWithDistance.getVectorWithId().getTableData().getTableKey()); } else { @@ -928,6 +940,32 @@ private SearchNest getSearch( } return SearchNest.Hnsw.builder().efSearch(efSearch).build(); + case VECTOR_BINARY_FLAT: + int binaryParallelOnQueries = 10; + o = parameterMap.get("parallelOnQueries"); + if (o != null) { + binaryParallelOnQueries = ((Number) o).intValue(); + } + + return SearchNest.BinaryFlat.builder() + .parallelOnQueries(binaryParallelOnQueries) + .build(); + case VECTOR_BINARY_IVF_FLAT: + int binaryNProbe = 10; + o = parameterMap.get("nprobe"); + if (o != null) { + binaryNProbe = ((Number) o).intValue(); + } + + int binaryParallel = 10; + o = parameterMap.get("parallelOnQueries"); + if (o != null) { + binaryParallel = ((Number) o).intValue(); + } + return SearchNest.BinaryIvfFlat.builder() + .nprobe(binaryNProbe) + .parallelOnQueries(binaryParallel) + .build(); case VECTOR_FLAT: default: { int parallelOnQueries = 10; diff --git a/dingo-tool-service/src/main/java/io/dingodb/tool/service/ToolService.java b/dingo-tool-service/src/main/java/io/dingodb/tool/service/ToolService.java index 3fe9e92da4..264fa0141f 100644 --- a/dingo-tool-service/src/main/java/io/dingodb/tool/service/ToolService.java +++ b/dingo-tool-service/src/main/java/io/dingodb/tool/service/ToolService.java @@ -31,6 +31,7 @@ import io.dingodb.sdk.service.entity.index.VectorCalcDistanceResponse; import io.dingodb.sdk.service.entity.index.VectorDistance; +import java.util.Collections; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -81,19 +82,39 @@ private static VectorCalcDistanceRequest buildRequest(VectorCalcDistance distanc case "COSINE": metricType = MetricType.METRIC_TYPE_COSINE; break; + case "METRIC_TYPE_HAMMING": + metricType = MetricType.METRIC_TYPE_HAMMING; + break; case "L2": default: metricType = MetricType.METRIC_TYPE_L2; break; } + if (!distance.isBinaryVector()) { + return VectorCalcDistanceRequest.builder() + .algorithmType(algorithmType) + .metricType(metricType) + .opLeftVectors(distance.getLeftList().stream() + .map(l -> Vector.builder().floatValues(l).dimension(distance.getDimension()).valueType(ValueType.FLOAT).build()) + .collect(Collectors.toList())) + .opRightVectors(distance.getRightList().stream() + .map(r -> Vector.builder().valueType(ValueType.FLOAT).dimension(distance.getDimension()).floatValues(r).build()) + .collect(Collectors.toList())) + .isReturnNormlize(false) + .build(); + } return VectorCalcDistanceRequest.builder() .algorithmType(algorithmType) .metricType(metricType) - .opLeftVectors(distance.getLeftList().stream() - .map(l -> Vector.builder().floatValues(l).dimension(distance.getDimension()).valueType(ValueType.FLOAT).build()) + .opLeftVectors(distance.getLeftBinaryValues().stream() + .map(l -> Vector.builder().binaryValues( + Collections.singletonList(l)).dimension(distance.getDimension() + ).valueType(ValueType.UINT8).build()) .collect(Collectors.toList())) - .opRightVectors(distance.getRightList().stream() - .map(r -> Vector.builder().valueType(ValueType.FLOAT).dimension(distance.getDimension()).floatValues(r).build()) + .opRightVectors(distance.getRightBinaryValues().stream() + .map(l -> Vector.builder().binaryValues( + Collections.singletonList(l)).dimension(distance.getDimension() + ).valueType(ValueType.UINT8).build()) .collect(Collectors.toList())) .isReturnNormlize(false) .build();