From bf0771a6c1f25f4a2316071a2b36e873395b1fef Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Tue, 6 Aug 2024 13:26:16 +0800 Subject: [PATCH] fix: didn't consider `json.Marshaler/Unmarshal` when `handling json:",string"` tag --- go1.23 | 1 - .../decoder/jitdec/assembler_regabi_amd64.go | 26 +++++- internal/decoder/jitdec/compiler.go | 72 +++++++++++---- .../decoder/jitdec/generic_regabi_amd64.go | 1 + internal/decoder/jitdec/primitives.go | 7 ++ internal/encoder/compiler.go | 25 ++++-- issue_test/issue670_test.go | 89 +++++++++++++++++++ 7 files changed, 195 insertions(+), 26 deletions(-) delete mode 160000 go1.23 create mode 100644 issue_test/issue670_test.go diff --git a/go1.23 b/go1.23 deleted file mode 160000 index c9940fe2a..000000000 --- a/go1.23 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c9940fe2a9f2eb77327efca860abfbae8d94bf28 diff --git a/internal/decoder/jitdec/assembler_regabi_amd64.go b/internal/decoder/jitdec/assembler_regabi_amd64.go index 1daddad0e..09e6faa95 100644 --- a/internal/decoder/jitdec/assembler_regabi_amd64.go +++ b/internal/decoder/jitdec/assembler_regabi_amd64.go @@ -972,11 +972,13 @@ var ( var ( _F_decodeJsonUnmarshaler obj.Addr + _F_decodeJsonUnmarshalerQuoted obj.Addr _F_decodeTextUnmarshaler obj.Addr ) func init() { _F_decodeJsonUnmarshaler = jit.Func(decodeJsonUnmarshaler) + _F_decodeJsonUnmarshalerQuoted = jit.Func(decodeJsonUnmarshalerQuoted) _F_decodeTextUnmarshaler = jit.Func(decodeTextUnmarshaler) } @@ -1061,14 +1063,15 @@ var ( _F_skip_number = jit.Imm(int64(native.S_skip_number)) ) -func (self *_Assembler) unmarshal_json(t reflect.Type, deref bool) { +func (self *_Assembler) unmarshal_json(t reflect.Type, deref bool, f obj.Addr) { self.call_sf(_F_skip_one) // CALL_SF skip_one self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v + self.Emit("MOVQ", _IC, _VAR_ic) // store for mismatche error skip self.slice_from_r(_AX, 0) // SLICE_R AX, $0 self.Emit("MOVQ" , _DI, _ARG_sv_p) // MOVQ DI, sv.p self.Emit("MOVQ" , _SI, _ARG_sv_n) // MOVQ SI, sv.n - self.unmarshal_func(t, _F_decodeJsonUnmarshaler, deref) // UNMARSHAL json, ${t}, ${deref} + self.unmarshal_func(t, f, deref) // UNMARSHAL json, ${t}, ${deref} } func (self *_Assembler) unmarshal_text(t reflect.Type, deref bool) { @@ -1102,6 +1105,13 @@ func (self *_Assembler) unmarshal_func(t reflect.Type, fn obj.Addr, deref bool) self.Emit("MOVQ" , _ARG_sv_p, _CX) // MOVQ sv.p, CX self.Emit("MOVQ" , _ARG_sv_n, _DI) // MOVQ sv.n, DI self.call_go(fn) // CALL_GO ${fn} + self.Emit("CMPQ", _ET, _I_json_MismatchTypeError) // check if MismatchedError + self.Sjmp("JNE" , "_check_error_{n}") + self.Emit("MOVQ", jit.Type(t), _CX) // store current type + self.Emit("MOVQ", _CX, _VAR_et) // store current type + self.Emit("MOVQ", _VAR_ic, _IC) // recover the pos + self.Emit("XORL", _ET, _ET) + self.Link("_check_error_{n}") self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET self.Sjmp("JNZ" , _LB_error) // JNZ _error } @@ -1774,11 +1784,19 @@ func (self *_Assembler) _asm_OP_struct_field(p *_Instr) { } func (self *_Assembler) _asm_OP_unmarshal(p *_Instr) { - self.unmarshal_json(p.vt(), true) + if iv := p.i64(); iv != 0 { + self.unmarshal_json(p.vt(), true, _F_decodeJsonUnmarshalerQuoted) + } else { + self.unmarshal_json(p.vt(), true, _F_decodeJsonUnmarshaler) + } } func (self *_Assembler) _asm_OP_unmarshal_p(p *_Instr) { - self.unmarshal_json(p.vt(), false) + if iv := p.i64(); iv != 0 { + self.unmarshal_json(p.vt(), false, _F_decodeJsonUnmarshalerQuoted) + } else { + self.unmarshal_json(p.vt(), false, _F_decodeJsonUnmarshaler) + } } func (self *_Assembler) _asm_OP_unmarshal_text(p *_Instr) { diff --git a/internal/decoder/jitdec/compiler.go b/internal/decoder/jitdec/compiler.go index f61105bc2..2a3180f81 100644 --- a/internal/decoder/jitdec/compiler.go +++ b/internal/decoder/jitdec/compiler.go @@ -271,6 +271,13 @@ func newInsVt(op _Op, vt reflect.Type) _Instr { } } +func newInsVtI(op _Op, vt reflect.Type, iv int) _Instr { + return _Instr { + u: packOp(op) | rt.PackInt(iv), + p: unsafe.Pointer(rt.UnpackType(vt)), + } +} + func newInsVf(op _Op, vf *caching.FieldMap) _Instr { return _Instr { u: packOp(op), @@ -452,6 +459,10 @@ func (self *_Program) rtt(op _Op, vt reflect.Type) { *self = append(*self, newInsVt(op, vt)) } +func (self *_Program) rtti(op _Op, vt reflect.Type, iv int) { + *self = append(*self, newInsVtI(op, vt, iv)) +} + func (self *_Program) fmv(op _Op, vf *caching.FieldMap) { *self = append(*self, newInsVf(op, vf)) } @@ -527,35 +538,49 @@ func (self *_Compiler) compile(vt reflect.Type) (ret _Program, err error) { return } -func (self *_Compiler) checkMarshaler(p *_Program, vt reflect.Type) bool { +const ( + checkMarshalerFlags_quoted = 1 +) + +func (self *_Compiler) checkMarshaler(p *_Program, vt reflect.Type, flags int, exec bool) bool { pt := reflect.PtrTo(vt) /* check for `json.Unmarshaler` with pointer receiver */ if pt.Implements(jsonUnmarshalerType) { - p.rtt(_OP_unmarshal_p, pt) + if exec { + p.add(_OP_lspace) + p.rtti(_OP_unmarshal_p, pt, flags) + } return true } /* check for `json.Unmarshaler` */ if vt.Implements(jsonUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalJson(p, vt) + if exec { + p.add(_OP_lspace) + self.compileUnmarshalJson(p, vt, flags) + } return true } /* check for `encoding.TextMarshaler` with pointer receiver */ if pt.Implements(encodingTextUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalTextPtr(p, pt) + if exec { + p.add(_OP_lspace) + self.compileUnmarshalTextPtr(p, pt, flags) + } return true } /* check for `encoding.TextUnmarshaler` */ if vt.Implements(encodingTextUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalText(p, vt) + if exec { + p.add(_OP_lspace) + self.compileUnmarshalText(p, vt, flags) + } return true } + return false } @@ -567,7 +592,7 @@ func (self *_Compiler) compileOne(p *_Program, sp int, vt reflect.Type) { return } - if self.checkMarshaler(p, vt) { + if self.checkMarshaler(p, vt, 0, true) { return } @@ -690,7 +715,7 @@ func (self *_Compiler) compilePtr(p *_Program, sp int, et reflect.Type) { /* dereference all the way down */ for et.Kind() == reflect.Ptr { - if self.checkMarshaler(p, et) { + if self.checkMarshaler(p, et, 0, true) { return } et = et.Elem() @@ -938,7 +963,22 @@ end_of_object: p.pin(skip) } +func (self *_Compiler) compileStructFieldStrUnmarshal(p *_Program, vt reflect.Type) { + p.add(_OP_lspace) + n0 := p.pc() + p.add(_OP_is_null) + self.checkMarshaler(p, vt, checkMarshalerFlags_quoted, true) + p.pin(n0) +} + func (self *_Compiler) compileStructFieldStr(p *_Program, sp int, vt reflect.Type) { + // according to std, Unmarshaler should be called before stringize + // see https://github.com/bytedance/sonic/issues/670 + if self.checkMarshaler(p, vt, checkMarshalerFlags_quoted, false) { + self.compileStructFieldStrUnmarshal(p, vt) + return + } + n1 := -1 ft := vt sv := false @@ -1106,7 +1146,7 @@ func (self *_Compiler) compileUnmarshalEnd(p *_Program, vt reflect.Type, i int) p.pin(j) } -func (self *_Compiler) compileUnmarshalJson(p *_Program, vt reflect.Type) { +func (self *_Compiler) compileUnmarshalJson(p *_Program, vt reflect.Type, flags int) { i := p.pc() v := _OP_unmarshal p.add(_OP_is_null) @@ -1117,11 +1157,11 @@ func (self *_Compiler) compileUnmarshalJson(p *_Program, vt reflect.Type) { } /* call the unmarshaler */ - p.rtt(v, vt) + p.rtti(v, vt, flags) self.compileUnmarshalEnd(p, vt, i) } -func (self *_Compiler) compileUnmarshalText(p *_Program, vt reflect.Type) { +func (self *_Compiler) compileUnmarshalText(p *_Program, vt reflect.Type, iv int) { i := p.pc() v := _OP_unmarshal_text p.add(_OP_is_null) @@ -1134,15 +1174,15 @@ func (self *_Compiler) compileUnmarshalText(p *_Program, vt reflect.Type) { } /* call the unmarshaler */ - p.rtt(v, vt) + p.rtti(v, vt, iv) self.compileUnmarshalEnd(p, vt, i) } -func (self *_Compiler) compileUnmarshalTextPtr(p *_Program, vt reflect.Type) { +func (self *_Compiler) compileUnmarshalTextPtr(p *_Program, vt reflect.Type, iv int) { i := p.pc() p.add(_OP_is_null) p.chr(_OP_match_char, '"') - p.rtt(_OP_unmarshal_text_p, vt) + p.rtti(_OP_unmarshal_text_p, vt, iv) p.pin(i) } diff --git a/internal/decoder/jitdec/generic_regabi_amd64.go b/internal/decoder/jitdec/generic_regabi_amd64.go index e6d5e3e84..2c21944a5 100644 --- a/internal/decoder/jitdec/generic_regabi_amd64.go +++ b/internal/decoder/jitdec/generic_regabi_amd64.go @@ -186,6 +186,7 @@ var ( _T_slice = jit.Type(reflect.TypeOf(([]interface{})(nil))) _T_string = jit.Type(reflect.TypeOf("")) _T_number = jit.Type(reflect.TypeOf(json.Number(""))) + _T_miserr = jit.Type(reflect.TypeOf(MismatchTypeError{})) _T_float64 = jit.Type(reflect.TypeOf(float64(0))) ) diff --git a/internal/decoder/jitdec/primitives.go b/internal/decoder/jitdec/primitives.go index ba865dc7b..5adfc038a 100644 --- a/internal/decoder/jitdec/primitives.go +++ b/internal/decoder/jitdec/primitives.go @@ -39,6 +39,13 @@ func decodeJsonUnmarshaler(vv interface{}, s string) error { return vv.(json.Unmarshaler).UnmarshalJSON(rt.Str2Mem(s)) } +func decodeJsonUnmarshalerQuoted(vv interface{}, s string) error { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return &MismatchTypeError{} + } + return vv.(json.Unmarshaler).UnmarshalJSON(rt.Str2Mem(s[1:len(s)-1])) +} + func decodeTextUnmarshaler(vv interface{}, s string) error { return vv.(encoding.TextUnmarshaler).UnmarshalText(rt.Str2Mem(s)) } diff --git a/internal/encoder/compiler.go b/internal/encoder/compiler.go index 034e1d17d..902fbc98b 100644 --- a/internal/encoder/compiler.go +++ b/internal/encoder/compiler.go @@ -127,31 +127,40 @@ func (self *Compiler) compileOne(p *ir.Program, sp int, vt reflect.Type, pv bool } } -func (self *Compiler) compileRec(p *ir.Program, sp int, vt reflect.Type, pv bool) { - pr := self.pv +func (self *Compiler) tryCompileMarshaler(p *ir.Program, vt reflect.Type, pv bool) bool { pt := reflect.PtrTo(vt) /* check for addressable `json.Marshaler` with pointer receiver */ if pv && pt.Implements(vars.JsonMarshalerType) { addMarshalerOp(p, ir.OP_marshal_p, pt, vars.JsonMarshalerType) - return + return true } /* check for `json.Marshaler` */ if vt.Implements(vars.JsonMarshalerType) { self.compileMarshaler(p, ir.OP_marshal, vt, vars.JsonMarshalerType) - return + return true } /* check for addressable `encoding.TextMarshaler` with pointer receiver */ if pv && pt.Implements(vars.EncodingTextMarshalerType) { addMarshalerOp(p, ir.OP_marshal_text_p, pt, vars.EncodingTextMarshalerType) - return + return true } /* check for `encoding.TextMarshaler` */ if vt.Implements(vars.EncodingTextMarshalerType) { self.compileMarshaler(p, ir.OP_marshal_text, vt, vars.EncodingTextMarshalerType) + return true + } + + return false +} + +func (self *Compiler) compileRec(p *ir.Program, sp int, vt reflect.Type, pv bool) { + pr := self.pv + + if self.tryCompileMarshaler(p, vt, pv) { return } @@ -485,6 +494,12 @@ func (self *Compiler) compileStructBody(p *ir.Program, sp int, vt reflect.Type) } func (self *Compiler) compileStructFieldStr(p *ir.Program, sp int, vt reflect.Type) { + // NOTICE: according to encoding/json, Marshaler type has higher priority than string option + // see issue: + if self.tryCompileMarshaler(p, vt, self.pv) { + return + } + pc := -1 ft := vt sv := false diff --git a/issue_test/issue670_test.go b/issue_test/issue670_test.go new file mode 100644 index 000000000..0497a33e1 --- /dev/null +++ b/issue_test/issue670_test.go @@ -0,0 +1,89 @@ +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package issue_test + +import ( + "encoding/json" + "fmt" + "testing" + "time" + + "github.com/bytedance/sonic" + "github.com/stretchr/testify/assert" +) + +func TestIssue670_encode(t *testing.T) { + var obj = Issue670Case{ D: Date(time.Now().Unix()) } + so, _ := sonic.MarshalString(obj) + eo, _ := json.Marshal(obj) + assert.Equal(t, string(eo), so) + println(string(eo)) +} + +func TestIssue670_decode(t *testing.T) { + // match + eo := []byte(`{"D":"2021-08-26","E":1}`) + testUnmarshal(t, eo) + + // mismatch + eo = []byte(`{"D":11,"E":1}`) + testUnmarshal(t, eo) + + // null + eo = []byte(`{"D":null,"E":1}`) + testUnmarshal(t, eo) +} + +func testUnmarshal(t *testing.T, eo []byte) { + obj := Issue670Case{} + println(string(eo)) + println("sonic") + es := sonic.Unmarshal(eo, &obj) + obj2 := Issue670Case{} + println("std") + ee := json.Unmarshal(eo, &obj2) + assert.Equal(t, ee ==nil, es == nil, es) + assert.Equal(t, obj2, obj) + fmt.Printf("std: %v, obj: %#v", ee, obj2) + fmt.Printf("sonic error: %v, obj: %#v", es, obj) +} + +type Issue670Case struct { + D Date `form:"D" json:"D,string" query:"D"` + E int +} + +type Date int64 + +func (d Date) MarshalJSON() ([]byte, error) { + if d == 0 { + return []byte("null"), nil + } + return []byte(fmt.Sprintf("\"%s\"", time.Unix(int64(d), 0).Format("2006-01-02"))), nil +} + +func (d *Date) UnmarshalJSON(in []byte) error { + if string(in) == "null" { + *d = 0 + return nil + } + println("hook ", string(in)) + t, err := time.Parse("2006-01-02", string(in)) + if err != nil { + return err + } + *d = Date(t.Unix()) + return nil +}