Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce ast.Value to support read and write concurrently #579

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,26 @@ println(string(buf) == string(exp)) // true
- iteration: `Values()`, `Properties()`, `ForEach()`, `SortKeys()`
- modification: `Set()`, `SetByIndex()`, `Add()`

### Ast.Value
Due to `ast.Node`'s **transversely-lazy-load** design, it ** CANNOT be read concurrently**. If your business has such scenario, you can use `ast.Value`:
```go
var opts = sonic.SearchOptions{
Copy: false, // to control returning JSON by Copy instead of Reference
Validate: false // to control if validate returned JSON syntax
}
val, err := opts.GetFromString(json, paths...) // skip and search JSON
any, err := val.Interface() // converts to go primitive type
```
Besides thread-safty, the main difference between `Value` and `Node` is that `Value` is implemented by pure `search-and-skip` of raw JSON, which means:
- When visit a path for the first time, `Value` only need to search entire JSON and return a node located by path, no need to parse and deserialize on-the-way nodes. In this case, it is faster than `Node`.
- When visit a repeated path more than once, `Value` still need to search the entire JSON and return the node, not like `Node` who can derefer and skip on-the-way nodes. In this case, it is slower than `Node`.
In one word, if your application **visit each JSON path exacly once**, without much overlapping, we advice you to use `Value`. Otherwise you'd better use `Node`

#### APIs
Most of its APIs are same with `ast.Node`'s, including both `Get` and `Set`. Besides:
- It provide `GetMany` \ `SetMany` \ `UnsetMany` \ `AddMany` \ `PopMany` to read or write multiple values once, in order to **reduce the overhead of repeatedly visiting path**.
-

### Ast.Visitor

Sonic provides an advanced API for fully parsing JSON into non-standard types (neither `struct` not `map[string]interface{}`) without using any intermediate representation (`ast.Node` or `interface{}`). For example, you might have the following types which are like `interface{}` but actually not `interface{}`:
Expand Down
60 changes: 58 additions & 2 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package sonic

import (
`io`
"io"

`github.com/bytedance/sonic/ast`
`github.com/bytedance/sonic/internal/rt`
Expand Down Expand Up @@ -200,9 +200,65 @@ func GetFromString(src string, path ...interface{}) (ast.Node, error) {

// GetCopyFromString is same with Get except src is string
func GetCopyFromString(src string, path ...interface{}) (ast.Node, error) {
return ast.NewSearcher(src).GetByPathCopy(path...)
s := ast.NewSearcher(src)
s.Copy(true)
return s.GetByPath(path...)
}

// Set write val to src according to path and return new json
func Set(src []byte, val interface{}, path ...interface{}) ([]byte, error) {
s, err := ast.NewSearcher(rt.Mem2Str(src)).SetValueByPath(ast.NewValue(val), path...)
if err != nil {
return nil, err
}
return rt.Str2Mem(s), nil
}

// SetFromString is same with Set except src is string,
// which can reduce unnecessary memory copy.
func SetFromString(src string, val interface{}, path ...interface{}) (string, error) {
return ast.NewSearcher(src).SetValueByPath(ast.NewValue(val), path...)
}

// Delete remove val to src according to path and return new json
func Delete(src []byte, path ...interface{}) (string, error) {
return ast.NewSearcher(rt.Mem2Str(src)).DeleteByPath(path...)
}

// Delete remove val to src according to path and return new json
func DeleteFromString(src string, path ...interface{}) (string, error) {
return ast.NewSearcher(src).DeleteByPath(path...)
}

// SearchOptions
type SearchOptions struct {
Copy bool // if copy returned JSON to reduce memory usage
Validate bool // if validate returned JSON for safty
}

// Get searches the given path from json,
// and returns its representing ast.Value.
//
// Each path arg must be integer or string:
// - Integer is target index(>=0), means searching current value as array.
// - String is target key, means searching current value as object.
//
//
// Note, the api expects the json is well-formed at least,
// otherwise it may return unexpected result.
func (opts SearchOptions) Get(src []byte, path ...interface{}) (ast.Value, error) {
return opts.GetFromString(rt.Mem2Str(src), path...)
}

// GetFromString is same with GetValue except src is string,
func (opts SearchOptions) GetFromString(src string, path ...interface{}) (ast.Value, error) {
s := ast.NewSearcher(src)
s.Validate(opts.Validate)
s.Copy(opts.Validate)
return s.GetValueByPath(path...)
}


// Valid reports whether data is a valid JSON encoding.
func Valid(data []byte) bool {
return ConfigDefault.Valid(data)
Expand Down
74 changes: 69 additions & 5 deletions ast/api_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
var typeByte = rt.UnpackEface(byte(0)).Type

//go:nocheckptr
func quote(buf *[]byte, val string) {
func Quote(buf *[]byte, val string) {
*buf = append(*buf, '"')
if len(val) == 0 {
*buf = append(*buf, '"')
Expand Down Expand Up @@ -73,7 +73,7 @@ func quote(buf *[]byte, val string) {
*buf = append(*buf, '"')
}

func unquote(src string) (string, types.ParsingError) {
func Unquote(src string) (string, types.ParsingError) {
return uq.String(src)
}

Expand Down Expand Up @@ -121,13 +121,77 @@ func (self *Parser) skipFast() (int, types.ParsingError) {
return start, 0
}

func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
func (self *Parser) getByPath(path ...interface{}) (int, types.ValueType, types.ParsingError) {
fsm := types.NewStateMachine()
start := native.GetByPath(&self.s, &self.p, &path, fsm)
types.FreeStateMachine(fsm)
runtime.KeepAlive(path)
if start < 0 {
return self.p, types.ParsingError(-start)
return self.p, 0, types.ParsingError(-start)
}
return start, 0
t := switchRawType(self.s[start])
if t == _V_NUMBER {
self.p = 1 + backward(self.s, self.p-1)
}
return start, t, 0
}

func (self *Parser) getByPathNoValidate(path ...interface{}) (int, types.ValueType, types.ParsingError) {
start := native.GetByPath(&self.s, &self.p, &path, nil)
runtime.KeepAlive(path)
if start < 0 {
return self.p, 0, types.ParsingError(-start)
}
t := switchRawType(self.s[start])
if t == _V_NUMBER {
self.p = 1 + backward(self.s, self.p-1)
}
return start, t, 0
}

func DecodeString(src string, pos int, needEsc bool) (v string, ret int, hasEsc bool) {
p := NewParserObj(src)
p.p = pos
switch val := p.decodeValue(); val.Vt {
case types.V_STRING:
str := p.s[val.Iv : p.p-1]
/* fast path: no escape sequence */
if val.Ep == -1 {
return str, p.p, false
} else if !needEsc {
return str, p.p, true
}
/* unquote the string */
out, err := Unquote(str)
/* check for errors */
if err != 0 {
return "", -int(err), true
} else {
return out, p.p, true
}
default:
return "", -int(_ERR_UNSUPPORT_TYPE), false
}
}

// ValidSyntax check if a json has a valid JSON syntax,
// while not validate UTF-8 charset
func ValidSyntax(json string) bool {
fsm := types.NewStateMachine()
p := 0
ret := native.ValidateOne(&json, &p, fsm, 0)
types.FreeStateMachine(fsm)

if ret < 0 {
return false
}

/* check for trailing spaces */
for ;p < len(json); p++ {
if !isSpace(json[p]) {
return false
}
}

return true
}
69 changes: 57 additions & 12 deletions ast/api_compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ package ast
import (
`encoding/base64`
`encoding/json`
`runtime`
`unsafe`

`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
Expand All @@ -30,11 +32,11 @@ func init() {
println("WARNING: sonic only supports Go1.16~1.22 && CPU amd64, but your environment is not suitable")
}

func quote(buf *[]byte, val string) {
func Quote(buf *[]byte, val string) {
quoteString(buf, val)
}

func unquote(src string) (string, types.ParsingError) {
func Unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
Expand Down Expand Up @@ -87,25 +89,68 @@ func (self *Node) encodeInterface(buf *[]byte) error {
return nil
}

func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
var err types.ParsingError
func (self *Parser) getByPath(path ...interface{}) (int, types.ValueType, types.ParsingError) {
for _, p := range path {
if idx, ok := p.(int); ok && idx >= 0 {
if err = self.searchIndex(idx); err != 0 {
return -1, err
if _, err := self.searchIndex(idx); err != 0 {
return self.p, 0, err
}
} else if key, ok := p.(string); ok {
if err = self.searchKey(key); err != 0 {
return -1, err
if _, err := self.searchKey(key); err != 0 {
return self.p, 0, err
}
} else {
panic("path must be either int(>=0) or string")
}
}
start, e := self.skip()
if e != 0 {
return self.p, 0, e
}
t := switchRawType(self.s[start])
if t == _V_NUMBER {
self.p = 1 + backward(self.s, self.p-1)
}
return start, t, 0
}

func (self *Parser) getByPathNoValidate(path ...interface{}) (int, types.ValueType, types.ParsingError) {
return self.getByPath(path...)
}

//go:nocheckptr
func DecodeString(src string, pos int, needEsc bool) (v string, ret int, hasEsc bool) {
ret, ep := skipString(src, pos)
if ep == -1 {
(*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
(*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
return v, ret, false
} else if !needEsc {
return src[pos+1:ret-1], ret, true
}

var start int
if start, err = self.skip(); err != 0 {
return -1, err
vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
if !ok {
return "", -int(types.ERR_INVALID_CHAR), true
}
return start, 0

runtime.KeepAlive(src)
return rt.Mem2Str(vv), ret, true
}

// ValidSyntax check if a json has a valid JSON syntax,
// while not validate UTF-8 charset
func ValidSyntax(json string) bool {
p, _ := skipValue(json, 0)
if p < 0 {
return false
}
/* check for trailing spaces */
for ;p < len(json); p++ {
if !isSpace(json[p]) {
return false
}
}

return true
}
Loading
Loading