From 755afe95e640f2cf179d409fcb26f57958c479a8 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Fri, 17 May 2024 17:16:25 +0800 Subject: [PATCH 1/5] feat:(ast) `Visitor` support skip object and array --- ast/parser.go | 4 ++ ast/visitor.go | 30 ++++++++++++ ast/visitor_test.go | 109 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) diff --git a/ast/parser.go b/ast/parser.go index a1f582623..506f9d86c 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -115,6 +115,10 @@ func (self *Parser) lspace(sp int) int { return sp } +func (self *Parser) backward() { + for ; self.p >= 0 && isSpace(self.s[self.p]); self.p-=1 {} +} + func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { sp := self.p ns := len(self.s) diff --git a/ast/visitor.go b/ast/visitor.go index d409509f5..c779126c0 100644 --- a/ast/visitor.go +++ b/ast/visitor.go @@ -18,6 +18,7 @@ package ast import ( `encoding/json` + `errors` `github.com/bytedance/sonic/internal/native/types` ) @@ -191,6 +192,19 @@ func (self *traverser) decodeArray() error { /* allocate array space and parse every element */ if err := self.visitor.OnArrayBegin(_DEFAULT_NODE_CAP); err != nil { + if err == VisitOPSkip { + // NOTICE: for user needs to skip entiry object + self.parser.p -= 1 + self.parser.backward() + if self.parser.s[self.parser.p] != '[' { + return types.ERR_INVALID_CHAR + } + if _, e := self.parser.skipFast(); e != 0 { + return e + } + println("skip to ", self.parser.p) + return self.visitor.OnArrayEnd() + } return err } for { @@ -240,6 +254,18 @@ func (self *traverser) decodeObject() error { /* allocate object space and decode each pair */ if err := self.visitor.OnObjectBegin(_DEFAULT_NODE_CAP); err != nil { + if err == VisitOPSkip { + // NOTICE: for user needs to skip entiry object + self.parser.p -= 1 + self.parser.backward() + if self.parser.s[self.parser.p] != '{' { + return types.ERR_INVALID_CHAR + } + if _, e := self.parser.skipFast(); e != 0 { + return e + } + return self.visitor.OnObjectEnd() + } return err } for { @@ -313,3 +339,7 @@ func (self *traverser) decodeString(iv int64, ep int) error { } return self.visitor.OnString(out) } + +// If visitor return this error on `OnObjectBegin()` or `OnArrayBegin()`, +// the transverer will skip entiry object or array +var VisitOPSkip = errors.New("") \ No newline at end of file diff --git a/ast/visitor_test.go b/ast/visitor_test.go index 9ecdc4a02..1fd8967df 100644 --- a/ast/visitor_test.go +++ b/ast/visitor_test.go @@ -648,6 +648,115 @@ func TestVisitor_UserNodeDiff(t *testing.T) { }) } +type skipVisitor struct { + sp int + Skip int + inSkip bool + CountSkip int +} + +func (self *skipVisitor) OnNull() error { + if self.sp == self.Skip+1 && self.inSkip { + panic("unexpected key") + } + return nil +} + +func (self *skipVisitor) OnFloat64(v float64, n json.Number) error { + if self.sp == self.Skip+1 && self.inSkip { + panic("unexpected key") + } + return nil +} + +func (self *skipVisitor) OnInt64(v int64, n json.Number) error { + if self.sp == self.Skip+1 && self.inSkip { + panic("unexpected key") + } + return nil +} + +func (self *skipVisitor) OnBool(v bool) error { + if self.sp == self.Skip+1 && self.inSkip { + panic("unexpected key") + } + return nil +} + +func (self *skipVisitor) OnString(v string) error { + if self.sp == self.Skip+1 && self.inSkip { + panic("unexpected key") + } + return nil +} + +func (self *skipVisitor) OnObjectBegin(capacity int) error { + println("self.sp", self.sp) + if self.sp == self.Skip { + self.inSkip = true + self.CountSkip++ + println("op skip") + self.sp++ + return VisitOPSkip + } + self.sp++ + return nil +} + +func (self *skipVisitor) OnObjectKey(key string) error { + if self.sp == self.Skip+1 && self.inSkip { + panic("unexpected key") + } + return nil +} + +func (self *skipVisitor) OnObjectEnd() error { + if self.sp == self.Skip + 1 { + if !self.inSkip { + panic("not in skip") + } + self.inSkip = false + println("finish op skip") + } + self.sp-- + return nil +} + +func (self *skipVisitor) OnArrayBegin(capacity int) error { + println("arr self.sp", self.sp) + if self.sp == self.Skip { + self.inSkip = true + self.CountSkip++ + println("arr op skip") + self.sp++ + return VisitOPSkip + } + self.sp++ + return nil +} + +func (self *skipVisitor) OnArrayEnd() error { + println("arr self.sp", self.sp) + if self.sp == self.Skip + 1 { + if !self.inSkip { + panic("arr not in skip") + } + self.inSkip = false + println("arr finish op skip") + } + self.sp-- + return nil +} + +func TestVisitor_OpSkip(t *testing.T) { + var suite skipVisitor + suite.Skip = 1 + Preorder(`{"a": [ null ] , "b":1, "c": { "1" : 1 } }`, &suite, nil) + if suite.CountSkip != 2 { + t.Fatal(suite.CountSkip) + } +} + func BenchmarkVisitor_UserNode(b *testing.B) { const str = _TwitterJson b.Run("AST", func(b *testing.B) { From d2edb31b167bfe8bedab648a0bb47e67a21a7596 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Fri, 17 May 2024 17:22:52 +0800 Subject: [PATCH 2/5] fmt --- ast/visitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ast/visitor.go b/ast/visitor.go index c779126c0..0e0f1761d 100644 --- a/ast/visitor.go +++ b/ast/visitor.go @@ -342,4 +342,4 @@ func (self *traverser) decodeString(iv int64, ep int) error { // If visitor return this error on `OnObjectBegin()` or `OnArrayBegin()`, // the transverer will skip entiry object or array -var VisitOPSkip = errors.New("") \ No newline at end of file +var VisitOPSkip = errors.New("") From b366e5a586ccb6ac765316b6d92a0498f75b9a8f Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Thu, 23 May 2024 16:00:19 +0800 Subject: [PATCH 3/5] remove print --- ast/visitor.go | 1 - fuzz/go-fuzz-corpus | 1 + tools/asm2asm | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 160000 fuzz/go-fuzz-corpus diff --git a/ast/visitor.go b/ast/visitor.go index 0e0f1761d..09027e79b 100644 --- a/ast/visitor.go +++ b/ast/visitor.go @@ -202,7 +202,6 @@ func (self *traverser) decodeArray() error { if _, e := self.parser.skipFast(); e != 0 { return e } - println("skip to ", self.parser.p) return self.visitor.OnArrayEnd() } return err diff --git a/fuzz/go-fuzz-corpus b/fuzz/go-fuzz-corpus new file mode 160000 index 000000000..c42c1b291 --- /dev/null +++ b/fuzz/go-fuzz-corpus @@ -0,0 +1 @@ +Subproject commit c42c1b2914c7503500996ee15927d3ab3d2ba968 diff --git a/tools/asm2asm b/tools/asm2asm index 7af8712ad..ff3a7abca 160000 --- a/tools/asm2asm +++ b/tools/asm2asm @@ -1 +1 @@ -Subproject commit 7af8712ade349cdc3c63cb14f0070e3e6a94390d +Subproject commit ff3a7abcac9d0bd5c1e74d8bf5b6abec4140e435 From ad92d9e242c49f8d4fcbc76a3ef41431c54f858c Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Thu, 23 May 2024 16:14:09 +0800 Subject: [PATCH 4/5] opt --- ast/parser.go | 4 --- ast/visitor.go | 63 ++++++++++++++++++--------------------------- ast/visitor_test.go | 2 +- 3 files changed, 26 insertions(+), 43 deletions(-) diff --git a/ast/parser.go b/ast/parser.go index 506f9d86c..38d98e906 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -658,7 +658,3 @@ func (self *Parser) ExportError(err types.ParsingError) error { }.Description()) } -func backward(src string, i int) int { - for ; i>=0 && isSpace(src[i]); i-- {} - return i -} diff --git a/ast/visitor.go b/ast/visitor.go index 09027e79b..f25407b99 100644 --- a/ast/visitor.go +++ b/ast/visitor.go @@ -175,30 +175,11 @@ func (self *traverser) decodeArray() error { sp := self.parser.p ns := len(self.parser.s) - /* check for EOF */ - self.parser.p = self.parser.lspace(sp) - if self.parser.p >= ns { - return types.ERR_EOF - } - - /* check for empty array */ - if self.parser.s[self.parser.p] == ']' { - self.parser.p++ - if err := self.visitor.OnArrayBegin(0); err != nil { - return err - } - return self.visitor.OnArrayEnd() - } - /* allocate array space and parse every element */ if err := self.visitor.OnArrayBegin(_DEFAULT_NODE_CAP); err != nil { if err == VisitOPSkip { // NOTICE: for user needs to skip entiry object self.parser.p -= 1 - self.parser.backward() - if self.parser.s[self.parser.p] != '[' { - return types.ERR_INVALID_CHAR - } if _, e := self.parser.skipFast(); e != 0 { return e } @@ -206,6 +187,19 @@ func (self *traverser) decodeArray() error { } return err } + + /* check for EOF */ + self.parser.p = self.parser.lspace(sp) + if self.parser.p >= ns { + return types.ERR_EOF + } + + /* check for empty array */ + if self.parser.s[self.parser.p] == ']' { + self.parser.p++ + return self.visitor.OnArrayEnd() + } + for { /* decode the value */ if err := self.decodeValue(); err != nil { @@ -236,30 +230,11 @@ func (self *traverser) decodeObject() error { sp := self.parser.p ns := len(self.parser.s) - /* check for EOF */ - self.parser.p = self.parser.lspace(sp) - if self.parser.p >= ns { - return types.ERR_EOF - } - - /* check for empty object */ - if self.parser.s[self.parser.p] == '}' { - self.parser.p++ - if err := self.visitor.OnObjectBegin(0); err != nil { - return err - } - return self.visitor.OnObjectEnd() - } - /* allocate object space and decode each pair */ if err := self.visitor.OnObjectBegin(_DEFAULT_NODE_CAP); err != nil { if err == VisitOPSkip { // NOTICE: for user needs to skip entiry object self.parser.p -= 1 - self.parser.backward() - if self.parser.s[self.parser.p] != '{' { - return types.ERR_INVALID_CHAR - } if _, e := self.parser.skipFast(); e != 0 { return e } @@ -267,6 +242,18 @@ func (self *traverser) decodeObject() error { } return err } + + /* check for EOF */ + self.parser.p = self.parser.lspace(sp) + if self.parser.p >= ns { + return types.ERR_EOF + } + + /* check for empty object */ + if self.parser.s[self.parser.p] == '}' { + return self.visitor.OnObjectEnd() + } + for { var njs types.JsonState var err types.ParsingError diff --git a/ast/visitor_test.go b/ast/visitor_test.go index 1fd8967df..c576bdd9d 100644 --- a/ast/visitor_test.go +++ b/ast/visitor_test.go @@ -751,7 +751,7 @@ func (self *skipVisitor) OnArrayEnd() error { func TestVisitor_OpSkip(t *testing.T) { var suite skipVisitor suite.Skip = 1 - Preorder(`{"a": [ null ] , "b":1, "c": { "1" : 1 } }`, &suite, nil) + Preorder(`{ "a": [ null ] , "b": 1, "c": { "1" : 1 } }`, &suite, nil) if suite.CountSkip != 2 { t.Fatal(suite.CountSkip) } From 55ca552f69fd78c511f6db5ea2125d43fea7ef11 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Thu, 23 May 2024 16:16:20 +0800 Subject: [PATCH 5/5] fix --- ast/parser.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ast/parser.go b/ast/parser.go index 38d98e906..506f9d86c 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -658,3 +658,7 @@ func (self *Parser) ExportError(err types.ParsingError) error { }.Description()) } +func backward(src string, i int) int { + for ; i>=0 && isSpace(src[i]); i-- {} + return i +}