Skip to content

Commit

Permalink
perf: use hashmap to optimize object get and insert operation
Browse files Browse the repository at this point in the history
  • Loading branch information
Ggiggle committed Jan 22, 2025
1 parent d0562bc commit db62def
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 149 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ version = "0.4.0-rc4"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ahash = "0.8"
bumpalo = "3.13"
bytes = "1.9"
cfg-if = "1.0"
Expand Down
8 changes: 5 additions & 3 deletions benchmarks/benches/value_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,12 @@ fn bench_object_insert(c: &mut Criterion) {
b.iter_batched(
|| (),
|_| {
let mut val = sonic_rs::Object::new();
let mut val = sonic_rs::Object::with_capacity(100);
for i in 0..100 {
let mut obj = sonic_rs::json!({"a":{"b":{"c":{"d":{}}}}});
for j in 0..100 {
*obj["a"]["b"]["c"]["d"].as_object_mut().unwrap() =
sonic_rs::Object::with_capacity(1000);
for j in 0..1000 {
obj["a"]["b"]["c"]["d"]
.as_object_mut()
.unwrap()
Expand All @@ -200,7 +202,7 @@ fn bench_object_insert(c: &mut Criterion) {
let mut val = serde_json::Map::new();
for i in 0..100 {
let mut obj = serde_json::json!({"a":{"b":{"c":{"d":{}}}}});
for j in 0..100 {
for j in 0..1000 {
obj["a"]["b"]["c"]["d"]
.as_object_mut()
.unwrap()
Expand Down
10 changes: 5 additions & 5 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ macro_rules! impl_str_index {
if !v.is_object() {
return None;
}
v.get_key_mut(*self).map(|v| v.0)
v.get_key_mut(*self)
}

#[inline]
Expand All @@ -184,11 +184,11 @@ macro_rules! impl_str_index {
obj.as_object_mut()
.expect(&format!("cannot access key in non-object value {:?}", typ))
.0
.get_key_mut(*self).map_or_else(|| {
.get_key_mut(*self).unwrap_or_else(|| {
let o = unsafe { dormant_obj.reborrow() };
let inserted = o.append_pair((Into::<Value>::into((*self)), Value::new_null()));
&mut inserted.1
}, |v| v.0)
let inserted = o.insert(&self, Value::new_null());
inserted
})
}

#[inline]
Expand Down
6 changes: 2 additions & 4 deletions src/value/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,7 @@ macro_rules! json_internal {
// Insert the current entry followed by trailing comma.
(@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => {
let key: &str = ($($key)+).as_ref();
let pair = ($crate::Value::copy_str(key), $value);
let _ = $object.append_pair(pair);
let _ = $object.insert(key, $value);
json_internal!(@object $object () ($($rest)*) ($($rest)*));
};

Expand All @@ -239,8 +238,7 @@ macro_rules! json_internal {
// Insert the last entry without trailing comma.
(@object $object:ident [$($key:tt)+] ($value:expr)) => {
let key: &str = ($($key)+).as_ref();
let pair = ($crate::Value::copy_str(key), $value);
let _ = $object.append_pair(pair);
let _ = $object.insert(key, $value);
};

// Next value is `null`.
Expand Down
154 changes: 75 additions & 79 deletions src/value/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::{
sync::Arc,
};

use ahash::AHashMap;
use bumpalo::Bump;
use faststr::FastStr;
use ref_cast::RefCast;
Expand Down Expand Up @@ -79,27 +80,27 @@ pub struct Value {
// - Owned Node : mutable
// - Shared Node: in SharedDom, not mutable
//
// | Kind | 3 bits | 5 bits | 24 bits | ----> 32 bits ----> | 32 bits | 32 bits | limit |
// |-------------|-----------------|-------------------|--------------------------------|-------------------------|----------------------|
// | Null | 0 | 0 | + | |
// | True | 0 | 1 | + | |
// | False | 0 | 2 | + | |
// | I64 | 0 | 3 | + i64 | |
// | U64 | 0 | 4 | + u64 | |
// | F64 | 0 | 5 | + f64 | |
// | empty arr | 0 | 6 | |
// | empty obj | 0 | 7 | |
// | static str | 0 | 8 | | string length + *const u8 | excced will fallback |
// | faststr | 1 | 0 | + Box<FastStr> | |
// |rawnum_fastst| 1 | 1 | + Box<FastStr> | |
// | arr_mut | 1 | 2 | + Arc<Vec<Node>> | |
// | obj_mut | 1 | 3 | + Arc<Vec<Pair>> | |
// | str_node | 2 | node idx | string length + *const u8 | max len 2^32 |
// | raw_num_node| 3 | node idx | string length + *const u8 | max len 2^32 |
// | arr_node | 4 | node idx | array length + *const Node | max len 2^32 |
// | obj_node | 5 | node idx | object length + *const Pair | max len 2^32 |
// |str_esc_raw | 6 | *const RawStrHeader (in SharedDom, MUST aligned 8) + *const u8 | |
// | root_node | 7 | *const ShardDom (from Arc, MUST aligned 8) + *const Node (head) | |
// | Kind | 3 bits | 5 bits | 24 bits | ----> 32 bits ----> | 32 bits | 32 bits | limit |
// |-------------|-----------------|-------------------|--------------------------------|-------------------------------|----------------------|
// | Null | 0 | 0 | + | |
// | True | 0 | 1 | + | |
// | False | 0 | 2 | + | |
// | I64 | 0 | 3 | + i64 | |
// | U64 | 0 | 4 | + u64 | |
// | F64 | 0 | 5 | + f64 | |
// | empty arr | 0 | 6 | |
// | empty obj | 0 | 7 | |
// | static str | 0 | 8 | | string length + *const u8 | excced will fallback |
// | faststr | 1 | 0 | + Box<FastStr> | |
// |rawnum_fastst| 1 | 1 | + Box<FastStr> | |
// | arr_mut | 1 | 2 | + Arc<Vec<Node>> | |
// | obj_mut | 1 | 3 | + Arc<AHashMap<FastStr, Value>> | |
// | str_node | 2 | node idx | string length + *const u8 | max len 2^32 |
// | raw_num_node| 3 | node idx | string length + *const u8 | max len 2^32 |
// | arr_node | 4 | node idx | array length + *const Node | max len 2^32 |
// | obj_node | 5 | node idx | object length + *const Pair | max len 2^32 |
// |str_esc_raw | 6 | *const RawStrHeader (in SharedDom, MUST aligned 8) + *const u8 | |
// | root_node | 7 | *const ShardDom (from Arc, MUST aligned 8) + *const Node (head) | |
//
// NB: we will check the JSON length when parsing, if JSON is > 2GB, will return a error, so we will not check the limits when parsing or using dom.
#[allow(clippy::box_collection)]
Expand All @@ -117,7 +118,7 @@ pub(crate) union Data {
pub(crate) root: NonNull<Value>,

pub(crate) str_own: ManuallyDrop<Box<FastStr>>,
pub(crate) obj_own: ManuallyDrop<Arc<Vec<Pair>>>,
pub(crate) obj_own: ManuallyDrop<Arc<AHashMap<FastStr, Value>>>,
pub(crate) arr_own: ManuallyDrop<Arc<Vec<Value>>>,

pub(crate) parent: u64,
Expand Down Expand Up @@ -427,7 +428,7 @@ enum ValueDetail<'a> {
FastStr(&'a FastStr),
RawNumFasStr(&'a FastStr),
Array(&'a Arc<Vec<Value>>),
Object(&'a Arc<Vec<Pair>>),
Object(&'a Arc<AHashMap<FastStr, Value>>),
Root(NodeInDom<'a>),
NodeInDom(NodeInDom<'a>),
EmptyArray,
Expand Down Expand Up @@ -474,13 +475,21 @@ pub enum ValueRefInner<'a> {
RawNum(&'a str),
Array(&'a [Value]),
Object(&'a [Pair]),
ObjectOwned(&'a Arc<AHashMap<FastStr, Value>>),
EmptyArray,
EmptyObject,
}

impl<'a> From<&'a [Pair]> for Value {
fn from(value: &'a [Pair]) -> Self {
let newd = value.to_vec();
let mut newd = AHashMap::with_capacity(value.len());

for (k, v) in value {
if let Some(k) = k.as_str() {
newd.insert(FastStr::new(k), v.clone());
}
}

Self {
meta: Meta::new(Meta::OBJ_MUT),
data: Data {
Expand Down Expand Up @@ -518,7 +527,7 @@ pub(crate) enum ValueMut<'a> {
Str,
RawNum,
Array(&'a mut Vec<Value>),
Object(&'a mut Vec<Pair>),
Object(&'a mut AHashMap<FastStr, Value>),
}

impl Value {
Expand Down Expand Up @@ -609,7 +618,7 @@ impl Value {
ValueDetail::FastStr(s) => ValueRefInner::Str(s.as_str()),
ValueDetail::RawNumFasStr(s) => ValueRefInner::RawNum(s.as_str()),
ValueDetail::Array(a) => ValueRefInner::Array(a),
ValueDetail::Object(o) => ValueRefInner::Object(o),
ValueDetail::Object(o) => ValueRefInner::ObjectOwned(o),
ValueDetail::Root(n) | ValueDetail::NodeInDom(n) => n.get_inner(),
ValueDetail::EmptyArray => ValueRefInner::EmptyArray,
ValueDetail::EmptyObject => ValueRefInner::EmptyObject,
Expand Down Expand Up @@ -693,8 +702,8 @@ impl From<Arc<Vec<Value>>> for Value {
}
}

impl From<Arc<Vec<Pair>>> for Value {
fn from(value: Arc<Vec<Pair>>) -> Self {
impl From<Arc<AHashMap<FastStr, Value>>> for Value {
fn from(value: Arc<AHashMap<FastStr, Value>>) -> Self {
Self {
meta: Meta::new(Meta::OBJ_MUT),
data: Data {
Expand Down Expand Up @@ -770,7 +779,7 @@ impl super::value_trait::JsonValueTrait for Value {
ValueRefInner::Number(_) => JsonType::Number,
ValueRefInner::Str(_) | ValueRefInner::RawStr(_) => JsonType::String,
ValueRefInner::Array(_) => JsonType::Array,
ValueRefInner::Object(_) => JsonType::Object,
ValueRefInner::Object(_) | ValueRefInner::ObjectOwned(_) => JsonType::Object,
ValueRefInner::RawNum(_) => JsonType::Number,
ValueRefInner::EmptyArray => JsonType::Array,
ValueRefInner::EmptyObject => JsonType::Object,
Expand Down Expand Up @@ -963,9 +972,9 @@ impl Value {
ValueRefInner::Array(_) | ValueRefInner::EmptyArray => {
ValueRef::Array(self.as_array().unwrap())
}
ValueRefInner::Object(_) | ValueRefInner::EmptyObject => {
ValueRef::Object(self.as_object().unwrap())
}
ValueRefInner::Object(_)
| ValueRefInner::EmptyObject
| ValueRefInner::ObjectOwned(_) => ValueRef::Object(self.as_object().unwrap()),
ValueRefInner::RawNum(raw) => {
crate::from_str(raw).map_or(ValueRef::Null, ValueRef::Number)
}
Expand Down Expand Up @@ -1197,7 +1206,7 @@ impl Value {

#[doc(hidden)]
pub fn new_object_with(capacity: usize) -> Self {
let obj_own = ManuallyDrop::new(Arc::new(Vec::with_capacity(capacity)));
let obj_own = ManuallyDrop::new(Arc::new(AHashMap::with_capacity(capacity)));
Value {
meta: Meta::new(Meta::OBJ_MUT),
data: Data { obj_own },
Expand Down Expand Up @@ -1231,39 +1240,27 @@ impl Value {

pub(crate) fn get_key_value(&self, key: &str) -> Option<(&str, &Self)> {
debug_assert!(self.is_object());
if let ValueRefInner::Object(kv) = self.as_ref2() {
let ref_inner = self.as_ref2();
if let ValueRefInner::Object(kv) = ref_inner {
for (k, v) in kv {
let k = k.as_str().expect("key is not string");
if k == key {
return Some((k, v));
}
}
}
None
}

#[inline]
pub(crate) fn get_key_offset(&self, key: &str) -> Option<usize> {
debug_assert!(self.is_object());
if let ValueRefInner::Object(kv) = self.as_ref2() {
for (i, pair) in kv.iter().enumerate() {
debug_assert!(pair.0.is_str());
if pair.0.equal_str(key) {
return Some(i);
}
} else if let ValueRefInner::ObjectOwned(kv) = ref_inner {
if let Some((k, v)) = kv.get_key_value(key) {
return Some((k.as_str(), v));
}
}
None
}

#[inline]
pub(crate) fn get_key_mut(&mut self, key: &str) -> Option<(&mut Self, usize)> {
pub(crate) fn get_key_mut(&mut self, key: &str) -> Option<&mut Self> {
if let ValueMut::Object(kv) = self.as_mut() {
for (i, (k, v)) in kv.iter_mut().enumerate() {
debug_assert!(k.is_str());
if k.equal_str(key) {
return Some((v, i));
}
if let Some(v) = kv.get_mut(key) {
return Some(v);
}
}
None
Expand Down Expand Up @@ -1313,25 +1310,14 @@ impl Value {
}

#[inline]
pub(crate) fn remove_pair_index(&mut self, index: usize) -> (Value, Value) {
pub(crate) fn remove_key(&mut self, k: &str) -> Option<Value> {
debug_assert!(self.is_object());
match self.as_mut() {
ValueMut::Object(obj) => obj.remove(index),
ValueMut::Object(obj) => obj.remove(k),
_ => unreachable!("value is not object"),
}
}

#[inline]
pub(crate) fn remove_key(&mut self, k: &str) -> Option<Value> {
debug_assert!(self.is_object());
if let Some(i) = self.get_key_offset(k) {
let (_, val) = self.remove_pair_index(i);
Some(val)
} else {
None
}
}

/// Take the value from the node, and set the node as a empty node.
/// Take will creat a new root node.
///
Expand Down Expand Up @@ -1380,13 +1366,12 @@ impl Value {

#[doc(hidden)]
#[inline]
pub fn append_pair(&mut self, pair: Pair) -> &mut Pair {
pub fn insert(&mut self, key: &str, val: Value) -> &mut Value {
debug_assert!(self.is_object());
match self.as_mut() {
ValueMut::Object(obj) => {
obj.push(pair);
let len = obj.len();
&mut obj[len - 1]
obj.insert(FastStr::new(key), val);
obj.get_mut(key).unwrap()
}
_ => unreachable!("value is not object"),
}
Expand All @@ -1401,15 +1386,6 @@ impl Value {
}
}

#[inline]
pub(crate) fn pop_pair(&mut self) -> Option<Pair> {
debug_assert!(self.is_object());
match self.as_mut() {
ValueMut::Object(obj) => obj.pop(),
_ => unreachable!("value is not object"),
}
}

#[inline(never)]
pub(crate) fn parse_with_padding(&mut self, json: &[u8], cfg: DeserializeCfg) -> Result<usize> {
// allocate the padding buffer for the input json
Expand Down Expand Up @@ -1795,6 +1771,26 @@ impl Serialize for Value {
map.end()
}
}
ValueRefInner::ObjectOwned(o) => {
#[cfg(feature = "sort_keys")]
{
let mut map = tri!(serializer.serialize_map(Some(o.len())));
for (k, v) in o.iter() {
tri!(map.serialize_key(k.as_str()));
tri!(map.serialize_value(v));
}
map.end()
}
#[cfg(not(feature = "sort_keys"))]
{
let mut map = tri!(serializer.serialize_map(Some(o.len())));
for (k, v) in o.iter() {
tri!(map.serialize_key(k.as_str()));
tri!(map.serialize_value(v));
}
map.end()
}
}
ValueRefInner::RawNum(raw) => {
use serde::ser::SerializeStruct;

Expand Down
Loading

0 comments on commit db62def

Please sign in to comment.