diff --git a/proto/value_unmarshal.go b/proto/value_unmarshal.go index 2cc380aa..aa31ae7e 100755 --- a/proto/value_unmarshal.go +++ b/proto/value_unmarshal.go @@ -215,7 +215,9 @@ func UnmarshalValue(b []byte, arch byte, baseType basetype.BaseType, profileType for i := range b { if b[i] == '\x00' { if last != i { // only if not an invalid string - vals = append(vals, utf8String(b[last:i])) + if s := utf8String(b[last:i]); len(s) > 0 { + vals = append(vals, s) + } } last = i + 1 } @@ -229,24 +231,27 @@ func UnmarshalValue(b []byte, arch byte, baseType basetype.BaseType, profileType return Value{}, fmt.Errorf("type %s(%d) is not supported: %w", baseType, baseType, ErrTypeNotSupported) } -// trimRightZero returns a subslice of b by slicing off all trailing zero. +// trimRightZero returns a subslice of b up to the null-terminated +// string ('\x00') and discard the remaining bytes, as these are likely +// padding bytes used to meet the desired length. func trimRightZero(b []byte) []byte { - for len(b) > 0 && b[len(b)-1] == 0 { - b = b[:len(b)-1] + for i := range b { + if b[i] == 0 { + return b[:i] + } } return b } -// utf8String converts b into a valid utf8 string. -// Any invalid utf8 character will be converted into utf8.RuneError. +// utf8String converts b into a valid UTF-8 string. If it encounters +// utf8.RuneError character, it will discard that character. func utf8String(b []byte) string { - if utf8.Valid(b) { // Fast path - return string(b) - } buf := make([]byte, 0, 255) for len(b) > 0 { r, size := utf8.DecodeRune(b) - buf = utf8.AppendRune(buf, r) + if r != utf8.RuneError { + buf = append(buf, byte(r)) // normal append as r should be valid now + } b = b[size:] } return string(buf) diff --git a/proto/value_unmarshal_internal_test.go b/proto/value_unmarshal_internal_test.go index a4fd36bf..680d2601 100644 --- a/proto/value_unmarshal_internal_test.go +++ b/proto/value_unmarshal_internal_test.go @@ -5,6 +5,7 @@ package proto import ( + "fmt" "testing" ) @@ -18,6 +19,8 @@ func TestTrimRightZero(t *testing.T) { {str: "Open Water", expected: "Open Water"}, {str: "Open Water\x00", expected: "Open Water"}, {str: "Open Water\x00\x00", expected: "Open Water"}, + {str: "Walk or jog lightly.\x00��", expected: "Walk or jog lightly."}, + {str: "Walk or jog lightly.��", expected: "Walk or jog lightly.��"}, } for _, tc := range tt { @@ -37,6 +40,7 @@ func BenchmarkTrimRightZero(b *testing.B) { _ = trimRightZero([]byte("Open Water")) _ = trimRightZero([]byte("Open Water\x00")) _ = trimRightZero([]byte("Open Water\x00\x00")) + _ = trimRightZero([]byte("Walk or jog lightly.\x00��")) } } @@ -45,15 +49,18 @@ func TestUTF8String(t *testing.T) { in []byte out string }{ - {in: []byte("0000000000000�0000000"), out: "0000000000000�0000000"}, - {in: []byte("0000000000000\xe80000000"), out: "0000000000000�0000000"}, + {in: []byte("Walk or jog lightly.��"), out: "Walk or jog lightly."}, + {in: []byte("0000000000000�0000000"), out: "00000000000000000000"}, + {in: []byte("0000000000000\xe80000000"), out: "00000000000000000000"}, } - for _, tc := range tt { - out := utf8String(tc.in) - if out != tc.out { - t.Fatalf("expected: %q, got: %q", tc.out, out) - } + for i, tc := range tt { + t.Run(fmt.Sprintf("[%d] %s", i, tc.in), func(t *testing.T) { + out := utf8String(tc.in) + if out != tc.out { + t.Fatalf("expected: %q, got: %q", tc.out, out) + } + }) } } diff --git a/proto/value_unmarshal_test.go b/proto/value_unmarshal_test.go index 358a7db2..21a6cd82 100644 --- a/proto/value_unmarshal_test.go +++ b/proto/value_unmarshal_test.go @@ -16,7 +16,7 @@ import ( "github.com/muktihari/fit/proto" ) -func TestUnmarshalValue(t *testing.T) { +func TestValueMarshalUnmarshalRoundTrip(t *testing.T) { tt := []struct { value proto.Value baseType basetype.BaseType @@ -72,6 +72,7 @@ func TestUnmarshalValue(t *testing.T) { {value: proto.SliceFloat64([]float64{1, 1.1}), baseType: basetype.Float64, profileType: profile.Float64, isArray: true}, {value: proto.SliceFloat64([]float64{1, -5.1}), baseType: basetype.Float64, profileType: profile.Float64, isArray: true}, {value: proto.SliceString([]string{"a", "b"}), baseType: basetype.String, profileType: profile.String, isArray: true}, + {value: proto.SliceString([]string{"a", "b"}), baseType: basetype.String, profileType: profile.String, isArray: true}, { value: proto.SliceUint8(stringsToBytes([]string{"mobile_app_version", "\x00", "\x00"}...)), expected: proto.SliceString([]string{"mobile_app_version"}), @@ -137,7 +138,7 @@ func stringsToBytes(vals ...string) []byte { return b } -func TestUnmarshalValueSliceAlloc(t *testing.T) { +func TestValueUnmarshalValueSliceAlloc(t *testing.T) { tt := []struct { value proto.Value profileType profile.ProfileType @@ -172,6 +173,55 @@ func TestUnmarshalValueSliceAlloc(t *testing.T) { } } +func TestValueUnmarshalStringValue(t *testing.T) { + tt := []struct { + in []byte + array bool + out []string // if array is false, result must be placed on index zero + err error + }{ + { + in: []byte("Walk or jog lightly.\x00��\x00"), + array: false, + out: []string{"Walk or jog lightly."}, + }, + { + in: []byte("Walk or jog lightly.\x00��\x00Another string\x00"), + array: false, + out: []string{"Walk or jog lightly.", "Another string"}, + }, + { + in: []byte("Walk or jog lightly.\x00��\x00"), + array: true, + out: []string{"Walk or jog lightly."}, + }, + } + + for i, tc := range tt { + t.Run(fmt.Sprintf("[%d] %s", i, tc.in), func(t *testing.T) { + value, err := proto.UnmarshalValue(tc.in, proto.LittleEndian, basetype.String, profile.String, tc.array) + if !errors.Is(tc.err, err) { + t.Fatalf("expected error: %v, got: %v", tc.err, err) + } + if err != nil { + return + } + + if tc.array { + vs := value.SliceString() + if diff := cmp.Diff(vs, tc.out); diff != "" { + t.Fatal(diff) + } + } else { + v := value.String() + if diff := cmp.Diff(v, tc.out[0]); diff != "" { + t.Fatal(diff) + } + } + }) + } +} + func BenchmarkUnmarshalValue(b *testing.B) { b.StopTimer() v := proto.Uint32(100)