From a099fc7ac458400ed57c33fd9c802fd5b99f5db0 Mon Sep 17 00:00:00 2001 From: Qingyang Hu Date: Fri, 31 Jan 2025 17:11:46 -0500 Subject: [PATCH] update test cases; update error message. --- bson/bson_binary_vector_spec_test.go | 90 +++++++++++++++++++++++----- bson/vector.go | 71 ++++++++++------------ 2 files changed, 108 insertions(+), 53 deletions(-) diff --git a/bson/bson_binary_vector_spec_test.go b/bson/bson_binary_vector_spec_test.go index 5077664341..a2c33f5a39 100644 --- a/bson/bson_binary_vector_spec_test.go +++ b/bson/bson_binary_vector_spec_test.go @@ -36,7 +36,7 @@ type bsonBinaryVectorTestCase struct { CanonicalBson string `json:"canonical_bson"` } -func Test_BsonBinaryVector(t *testing.T) { +func TestBsonBinaryVector(t *testing.T) { t.Parallel() jsonFiles, err := findJSONFilesInDir(bsonBinaryVectorDir) @@ -70,13 +70,13 @@ func Test_BsonBinaryVector(t *testing.T) { val := Binary{Subtype: TypeBinaryVector} for _, tc := range [][]byte{ - {byte(Float32Vector), 0, 42}, - {byte(Float32Vector), 0, 42, 42}, - {byte(Float32Vector), 0, 42, 42, 42}, + {Float32Vector, 0, 42}, + {Float32Vector, 0, 42, 42}, + {Float32Vector, 0, 42, 42, 42}, - {byte(Float32Vector), 0, 42, 42, 42, 42, 42}, - {byte(Float32Vector), 0, 42, 42, 42, 42, 42, 42}, - {byte(Float32Vector), 0, 42, 42, 42, 42, 42, 42, 42}, + {Float32Vector, 0, 42, 42, 42, 42, 42}, + {Float32Vector, 0, 42, 42, 42, 42, 42, 42}, + {Float32Vector, 0, 42, 42, 42, 42, 42, 42, 42}, } { t.Run(fmt.Sprintf("marshaling %d bytes", len(tc)-2), func(t *testing.T) { val.Data = tc @@ -91,6 +91,36 @@ func Test_BsonBinaryVector(t *testing.T) { } }) + t.Run("FLOAT32 with padding", func(t *testing.T) { + t.Parallel() + + t.Run("Unmarshaling", func(t *testing.T) { + val := D{{"vector", Binary{Subtype: TypeBinaryVector, Data: []byte{Float32Vector, 3}}}} + b, err := Marshal(val) + require.NoError(t, err, "marshaling test BSON") + var got struct { + Vector Vector + } + err = Unmarshal(b, &got) + require.ErrorContains(t, err, errNonZeroVectorPadding.Error()) + }) + }) + + t.Run("INT8 with padding", func(t *testing.T) { + t.Parallel() + + t.Run("Unmarshaling", func(t *testing.T) { + val := D{{"vector", Binary{Subtype: TypeBinaryVector, Data: []byte{Int8Vector, 3}}}} + b, err := Marshal(val) + require.NoError(t, err, "marshaling test BSON") + var got struct { + Vector Vector + } + err = Unmarshal(b, &got) + require.ErrorContains(t, err, errNonZeroVectorPadding.Error()) + }) + }) + t.Run("Padding specified with no vector data PACKED_BIT", func(t *testing.T) { t.Parallel() @@ -134,13 +164,13 @@ func convertSlice[T int8 | float32 | byte](s []interface{}) []T { v := make([]T, len(s)) for i, e := range s { f := math.NaN() - switch v := e.(type) { + switch val := e.(type) { case float64: - f = v + f = val case string: - if v == "inf" { + if val == "inf" { f = math.Inf(0) - } else if v == "-inf" { + } else if val == "-inf" { f = math.Inf(-1) } } @@ -150,10 +180,6 @@ func convertSlice[T int8 | float32 | byte](s []interface{}) []T { } func runBsonBinaryVectorTest(t *testing.T, testKey string, test bsonBinaryVectorTestCase) { - if !test.Valid { - t.Skipf("skip invalid case %s", test.Description) - } - testVector := make(map[string]Vector) switch alias := test.DtypeHex; alias { case "0x03": @@ -180,6 +206,23 @@ func runBsonBinaryVectorTest(t *testing.T, testKey string, test bsonBinaryVector require.NoError(t, err, "decoding canonical BSON") t.Run("Unmarshaling", func(t *testing.T) { + skipCases := map[string]string{ + "FLOAT32 with padding": "run in alternative case", + "Overflow Vector INT8": "compile-time restriction", + "Underflow Vector INT8": "compile-time restriction", + "INT8 with padding": "run in alternative case", + "INT8 with float inputs": "compile-time restriction", + "Overflow Vector PACKED_BIT": "compile-time restriction", + "Underflow Vector PACKED_BIT": "compile-time restriction", + "Vector with float values PACKED_BIT": "compile-time restriction", + "Padding specified with no vector data PACKED_BIT": "run in alternative case", + "Exceeding maximum padding PACKED_BIT": "run in alternative case", + "Negative padding PACKED_BIT": "compile-time restriction", + } + if reason, ok := skipCases[test.Description]; ok { + t.Skipf("skip test case %s: %s", test.Description, reason) + } + t.Parallel() var got map[string]Vector @@ -189,6 +232,23 @@ func runBsonBinaryVectorTest(t *testing.T, testKey string, test bsonBinaryVector }) t.Run("Marshaling", func(t *testing.T) { + skipCases := map[string]string{ + "FLOAT32 with padding": "private padding field", + "Overflow Vector INT8": "compile-time restriction", + "Underflow Vector INT8": "compile-time restriction", + "INT8 with padding": "private padding field", + "INT8 with float inputs": "compile-time restriction", + "Overflow Vector PACKED_BIT": "compile-time restriction", + "Underflow Vector PACKED_BIT": "compile-time restriction", + "Vector with float values PACKED_BIT": "compile-time restriction", + "Padding specified with no vector data PACKED_BIT": "run in alternative case", + "Exceeding maximum padding PACKED_BIT": "run in alternative case", + "Negative padding PACKED_BIT": "compile-time restriction", + } + if reason, ok := skipCases[test.Description]; ok { + t.Skipf("skip test case %s: %s", test.Description, reason) + } + t.Parallel() got, err := Marshal(testVector) diff --git a/bson/vector.go b/bson/vector.go index e2b2e91cda..06c66e49cc 100644 --- a/bson/vector.go +++ b/bson/vector.go @@ -13,50 +13,42 @@ import ( "math" ) -// VectorDType represents the Vector data type. -type VectorDType byte - // These constants are vector data types. const ( - Int8Vector VectorDType = 0x03 - Float32Vector VectorDType = 0x27 - PackedBitVector VectorDType = 0x10 + Int8Vector byte = 0x03 + Float32Vector byte = 0x27 + PackedBitVector byte = 0x10 ) -// Stringer of VectorDType -func (vt VectorDType) String() string { - switch vt { - case Int8Vector: - return "int8" - case Float32Vector: - return "float32" - case PackedBitVector: - return "packed bit" - default: - return "invalid" - } -} - // These are vector conversion errors. var ( errInsufficientVectorData = errors.New("insufficient data") errNonZeroVectorPadding = errors.New("padding must be 0") - errVectorPaddingTooLarge = errors.New("padding larger than 7") + errVectorPaddingTooLarge = errors.New("padding cannot be larger than 7") ) type vectorTypeError struct { Method string - Type VectorDType + Type byte } // Error implements the error interface. func (vte vectorTypeError) Error() string { - return "Call of " + vte.Method + " on " + vte.Type.String() + " vector" + t := "invalid" + switch vte.Type { + case Int8Vector: + t = "int8" + case Float32Vector: + t = "float32" + case PackedBitVector: + t = "packed bit" + } + return fmt.Sprintf("cannot call %s, on a type %s vector", vte.Method, t) } // Vector represents a densely packed array of numbers / bits. type Vector struct { - dType VectorDType + dType byte int8Data []int8 float32Data []float32 bitData []byte @@ -64,7 +56,7 @@ type Vector struct { } // Type returns the vector type. -func (v Vector) Type() VectorDType { +func (v Vector) Type() byte { return v.dType } @@ -123,7 +115,7 @@ func (v Vector) PackedBitOK() ([]byte, uint8, bool) { return v.bitData, v.bitPadding, true } -// Binary returns the BSON Binary of the Vector. +// Binary returns the BSON Binary representation of the Vector. func (v Vector) Binary() Binary { switch v.Type() { case Int8Vector: @@ -133,15 +125,17 @@ func (v Vector) Binary() Binary { case PackedBitVector: return binaryFromBitVector(v.PackedBit()) default: - panic("invalid Vector type") + panic(fmt.Sprintf("invalid Vector data type: %d", v.dType)) } } func binaryFromInt8Vector(v []int8) Binary { - data := make([]byte, 2, len(v)+2) - copy(data, []byte{byte(Int8Vector), 0}) - for _, e := range v { - data = append(data, byte(e)) + data := make([]byte, len(v)+2) + data[0] = Int8Vector + data[1] = 0 + + for i, e := range v { + data[i+2] = byte(e) } return Binary{ @@ -152,7 +146,8 @@ func binaryFromInt8Vector(v []int8) Binary { func binaryFromFloat32Vector(v []float32) Binary { data := make([]byte, 2, len(v)*4+2) - copy(data, []byte{byte(Float32Vector), 0}) + data[0] = Float32Vector + data[1] = 0 var a [4]byte for _, e := range v { binary.LittleEndian.PutUint32(a[:], math.Float32bits(e)) @@ -166,7 +161,7 @@ func binaryFromFloat32Vector(v []float32) Binary { } func binaryFromBitVector(bits []byte, padding uint8) Binary { - data := []byte{byte(PackedBitVector), padding} + data := []byte{PackedBitVector, padding} data = append(data, bits...) return Binary{ Subtype: TypeBinaryVector, @@ -180,12 +175,12 @@ func NewVector[T int8 | float32](data []T) Vector { switch a := any(data).(type) { case []int8: v.dType = Int8Vector - v.int8Data = []int8{} - v.int8Data = append(v.int8Data, a...) + v.int8Data = make([]int8, len(data)) + copy(v.int8Data, a) case []float32: v.dType = Float32Vector - v.float32Data = []float32{} - v.float32Data = append(v.float32Data, a...) + v.float32Data = make([]float32, len(data)) + copy(v.float32Data, a) default: panic(fmt.Errorf("unsupported type %T", data)) } @@ -217,7 +212,7 @@ func NewVectorFromBinary(b Binary) (Vector, error) { if len(b.Data) < 2 { return v, errInsufficientVectorData } - switch t := b.Data[0]; VectorDType(t) { + switch t := b.Data[0]; t { case Int8Vector: return newInt8Vector(b.Data[1:]) case Float32Vector: