From 7470296a5349a1dc2c5c4d4cfa8858e3efb15cfe Mon Sep 17 00:00:00 2001 From: liuq19 Date: Thu, 31 Oct 2024 23:30:36 +0800 Subject: [PATCH] feat: support use_rawnumber and use_raw --- Cargo.toml | 21 +- scripts/sanitize.sh | 41 +-- src/config.rs | 5 + src/error.rs | 1 + src/index.rs | 1 - src/lazyvalue/get.rs | 4 +- src/lazyvalue/owned.rs | 1 - src/lazyvalue/value.rs | 5 +- src/lib.rs | 1 + src/parser.rs | 581 +++++++++++++-------------------------- src/reader.rs | 1 - src/serde/de.rs | 67 ++--- src/serde/rawnumber.rs | 4 + src/serde/ser.rs | 83 ++---- src/util/num/common.rs | 8 - src/util/num/float.rs | 4 + src/util/simd/avx2.rs | 5 - src/util/simd/mod.rs | 18 +- src/util/simd/sse2.rs | 5 - src/util/simd/v128.rs | 5 - src/util/simd/v256.rs | 5 - src/util/simd/v512.rs | 5 - src/value/array.rs | 15 +- src/value/from.rs | 4 +- src/value/mod.rs | 3 - src/value/node.rs | 461 +++++++++++++++---------------- src/value/object.rs | 38 +-- src/value/partial_eq.rs | 6 +- src/value/ser.rs | 173 +++++++++++- src/value/shared.rs | 18 +- src/value/value_trait.rs | 26 +- src/value/visitor.rs | 10 +- 32 files changed, 727 insertions(+), 898 deletions(-) create mode 100644 src/config.rs diff --git a/Cargo.toml b/Cargo.toml index 58f8dab..ff45952 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,18 +14,15 @@ version = "0.3.14" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -bumpalo = "3.13" -bytes = "1.8" -cfg-if = "1.0" -faststr = { version = "0.2", features = ["serde"] } -itoa = "1.0" -parking_lot = "0.12" -ryu = "1.0" -serde = { version = "1.0", features = ["rc", "derive"] } -simdutf8 = "0.1" -smallvec = "1.11" -static_assertions = "1.1" -thiserror = "1.0" +bumpalo = "3.13" +bytes = "1.8" +cfg-if = "1.0" +faststr = { version = "0.2", features = ["serde"] } +itoa = "1.0" +ryu = "1.0" +serde = { version = "1.0", features = ["rc", "derive"] } +simdutf8 = "0.1" +thiserror = "1.0" [dev-dependencies] encoding_rs = "0.8" diff --git a/scripts/sanitize.sh b/scripts/sanitize.sh index 81b823b..736b4d5 100755 --- a/scripts/sanitize.sh +++ b/scripts/sanitize.sh @@ -4,41 +4,10 @@ set -ex export ASAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1:abort_on_error=1" -testcase_lists() { - cargo test -- -Zunstable-options --list --format json - local result=$? - if [ ${result} -ne 0 ]; then - exit -1 - fi - cargo test -- -Zunstable-options --list --format json | jq -c 'select(.type=="test") | .name' | awk -F'"' '{print $2}' | awk '{print ($2) ? $3 : $1}' - return $? -} - -sanitize() { - local san="$1" - local target="$2" - local testcase="$3" - # use single thread to make error info more readable and accurate - RUSTFLAGS="-Zsanitizer=${san}" RUSTDOCFLAGS="-Zsanitizer=${san}" cargo test --target ${target} ${testcase} -- --test-threads=1 - RUSTFLAGS="-Zsanitizer=${san}" RUSTDOCFLAGS="-Zsanitizer=${san}" cargo test --doc --package sonic-rs --target ${target} ${testcase} -- --show-output --test-threads=1 -} - -sanitize_single() { - local san="$1" - local target="$2" - local lists=$(testcase_lists) - for case in ${lists}; do - sanitize ${san} ${target} ${case} - done -} - -main() { - for san in address leak; do - echo "Running tests with $san" - sanitize_single $san "x86_64-unknown-linux-gnu" - done -} - -main "$@" +for san in address leak; do + echo "Running tests with $san" + RUSTFLAGS="-Zsanitizer=${san}" RUSTDOCFLAGS="-Zsanitizer=${san}" cargo +nightly test --target x86_64-unknown-linux-gnu -- --test-threads=1 + RUSTFLAGS="-Zsanitizer=${san}" RUSTDOCFLAGS="-Zsanitizer=${san}" cargo +nightly test --doc --package sonic-rs --target x86_64-unknown-linux-gnu -- --show-output --test-threads=1 +done diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..30ed218 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct DeserializeCfg { + pub(crate) use_rawnumber: bool, + pub(crate) use_raw: bool, +} diff --git a/src/error.rs b/src/error.rs index 805eddc..d610562 100644 --- a/src/error.rs +++ b/src/error.rs @@ -504,6 +504,7 @@ mod test { #[test] fn test_serde_errors_display() { + #[allow(unused)] #[derive(Debug, Deserialize)] struct Foo { a: Vec, diff --git a/src/index.rs b/src/index.rs index d05cb9c..f0ffb03 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,6 +1,5 @@ use crate::{ util::{private::Sealed, reborrow::DormantMutRef}, - value::{object::DEFAULT_OBJ_CAP, shared::Shared}, JsonValueMutTrait, JsonValueTrait, PointerNode, Value, }; diff --git a/src/lazyvalue/get.rs b/src/lazyvalue/get.rs index ae52269..882618e 100644 --- a/src/lazyvalue/get.rs +++ b/src/lazyvalue/get.rs @@ -179,7 +179,7 @@ where let slice = json.to_u8_slice(); let reader = Read::new(slice, false); let mut parser = Parser::new(reader); - let (sub, status) = parser.get_from_with_iter(path)?; + let (sub, status) = parser.get_from_with_iter_unchecked(path)?; LazyValue::new(json.from_subset(sub), status == ParseStatus::HasEscaped) } @@ -388,7 +388,7 @@ where let slice = json.to_u8_slice(); let reader = Read::new(slice, false); let mut parser = Parser::new(reader); - let (sub, status) = parser.get_from_with_iter_checked(path)?; + let (sub, status) = parser.get_from_with_iter(path)?; let lv = LazyValue::new(json.from_subset(sub), status == ParseStatus::HasEscaped)?; // validate the utf-8 if slice diff --git a/src/lazyvalue/owned.rs b/src/lazyvalue/owned.rs index 50865a1..606b4d8 100644 --- a/src/lazyvalue/owned.rs +++ b/src/lazyvalue/owned.rs @@ -91,7 +91,6 @@ impl JsonValueTrait for OwnedLazyValue { } } - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option { if let Ok(num) = from_str(self.as_raw_str()) { Some(num) diff --git a/src/lazyvalue/value.rs b/src/lazyvalue/value.rs index a268495..f4b0dd5 100644 --- a/src/lazyvalue/value.rs +++ b/src/lazyvalue/value.rs @@ -9,11 +9,9 @@ use std::{ use faststr::FastStr; -#[cfg(feature = "arbitrary_precision")] -use crate::RawNumber; use crate::{ from_str, get_unchecked, index::Index, input::JsonSlice, serde::Number, JsonType, - JsonValueTrait, Result, + JsonValueTrait, RawNumber, Result, }; /// LazyValue wrappers a unparsed raw JSON text. It is borrowed from the origin JSON text. @@ -200,7 +198,6 @@ impl<'a> JsonValueTrait for LazyValue<'a> { } } - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option { if let Ok(num) = from_str(self.as_raw_str()) { Some(num) diff --git a/src/lib.rs b/src/lib.rs index a3f1bb3..6b488a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ #![allow(clippy::needless_lifetimes)] #![doc(test(attr(warn(unused))))] +mod config; mod error; mod index; mod input; diff --git a/src/parser.rs b/src/parser.rs index 9b97afe..191b4be 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,18 +3,16 @@ use std::{ ops::Deref, slice::{from_raw_parts, from_raw_parts_mut}, str::from_utf8_unchecked, - sync::Arc, }; -use bumpalo::Bump; use faststr::FastStr; use serde::de::{self, Expected, Unexpected}; -use smallvec::SmallVec; use super::reader::{Reader, Reference}; #[cfg(all(target_feature = "neon", target_arch = "aarch64"))] use crate::util::simd::bits::NeonBits; use crate::{ + config::DeserializeCfg, error::{ Error, ErrorCode::{self, *}, @@ -23,7 +21,7 @@ use crate::{ index::Index, pointer::{ tree::{MultiIndex, MultiKey, PointerTreeInner, PointerTreeNode}, - JsonPointer, PointerTree, + PointerTree, }, util::{ arch::{get_nonspace_bits, prefix_xor}, @@ -32,7 +30,7 @@ use crate::{ string::*, unicode::{codepoint_to_utf8, hex_to_u32_nocheck}, }, - value::{shared::Shared, visitor::JsonVisitor}, + value::{node::RawStr, visitor::JsonVisitor}, LazyValue, }; @@ -64,7 +62,9 @@ macro_rules! perr { macro_rules! check_visit { ($self:ident, $e:expr $(,)?) => { if !($e) { - return perr!($self, UnexpectedVisitType); + perr!($self, UnexpectedVisitType) + } else { + Ok(()) } }; } @@ -146,6 +146,7 @@ pub(crate) struct Parser { error_index: usize, // mark the error position nospace_bits: u64, // SIMD marked nospace bitmap nospace_start: isize, // the start position of nospace_bits + pub(crate) cfg: DeserializeCfg, } /// Records the parse status @@ -165,9 +166,15 @@ where error_index: usize::MAX, nospace_bits: 0, nospace_start: -128, + cfg: DeserializeCfg::default(), } } + pub(crate) fn with_config(mut self, cfg: DeserializeCfg) -> Self { + self.cfg = cfg; + self + } + #[inline(always)] fn error_index(&self) -> usize { // when parsing strings , we need record the error position. @@ -192,7 +199,6 @@ where reason = EofWhileParsing; index = len; } - dbg!(crate::parser::as_str(self.read.as_u8_slice())); Error::syntax(reason, self.read.as_u8_slice(), index) } @@ -222,7 +228,7 @@ where // TODO: optimize me, avoid clone twice. #[inline(always)] - fn parse_string_owned(&mut self, visitor: &mut V, strbuf: &mut Vec) -> Result<()> + fn parse_string_owned(&mut self, vis: &mut V, strbuf: &mut Vec) -> Result<()> where V: JsonVisitor<'de>, { @@ -246,10 +252,7 @@ where } #[inline(always)] - fn parse_string_inplace(&mut self, visitor: &mut V) -> Result<()> - where - V: JsonVisitor<'de>, - { + fn parse_string_inplace_impl>(&mut self, vis: &mut V) -> Result<()> { unsafe { let mut src = self.read.cur_ptr(); let start = self.read.cur_ptr(); @@ -257,95 +260,105 @@ where self.read.set_ptr(src); let slice = from_raw_parts(start, cnt); let s = from_utf8_unchecked(slice); - check_visit!(self, visitor.visit_borrowed_str(s)); + check_visit!(self, vis.visit_borrowed_str(s)) + } + } + + #[inline(always)] + fn parse_string_inplace>(&mut self, vis: &mut V) -> Result<()> { + if !self.cfg.use_raw { + return self.parse_string_inplace_impl(vis); + } + unsafe { + let start_idx = self.read.index(); + let mut src = self.read.cur_ptr(); + let start = self.read.cur_ptr(); + match self.skip_string_unchecked()? { + ParseStatus::HasEscaped => { + let end = self.read.index(); + let raw = as_str(&self.read.as_u8_slice()[start_idx - 1..end]); + let alloc = vis.allocator().unwrap(); + let raw = RawStr::new_in(alloc, raw); + let cnt = parse_string_inplace(&mut src).map_err(|e| self.error(e))?; + self.read.set_ptr(src); + let s = str_from_raw_parts(start, cnt); + check_visit!(self, vis.visit_raw_str(s, raw)) + } + ParseStatus::None => { + let end = self.read.index() - 1; + let s = as_str(&self.read.as_u8_slice()[start_idx..end]); + check_visit!(self, vis.visit_borrowed_str(s)) + } + } } - Ok(()) } #[inline(always)] - fn parse_number_visit(&mut self, first: u8, visitor: &mut V) -> Result<()> + fn parse_number_visit(&mut self, first: u8, vis: &mut V) -> Result<()> where V: JsonVisitor<'de>, { - #[cfg(feature = "arbitrary_precision")] - { + if self.cfg.use_rawnumber || self.cfg.use_raw { let start = self.read.index() - 1; self.skip_number(first)?; let slice = self.read.slice_unchecked(start, self.read.index()); - check_visit!(self, visitor.visit_raw_number(as_str(slice))); - Ok(()) - } - - #[cfg(not(feature = "arbitrary_precision"))] - { + check_visit!(self, vis.visit_raw_number(as_str(slice))) + } else { let ok = match self.parse_number(first)? { - ParserNumber::Float(f) => visitor.visit_f64(f), - ParserNumber::Unsigned(f) => visitor.visit_u64(f), - ParserNumber::Signed(f) => visitor.visit_i64(f), + ParserNumber::Float(f) => vis.visit_f64(f), + ParserNumber::Unsigned(f) => vis.visit_u64(f), + ParserNumber::Signed(f) => vis.visit_i64(f), }; - check_visit!(self, ok); - Ok(()) + check_visit!(self, ok) } } #[inline(always)] - fn parse_number_inplace(&mut self, first: u8, visitor: &mut V) -> Result<()> + fn parse_number_inplace(&mut self, first: u8, vis: &mut V) -> Result<()> where V: JsonVisitor<'de>, { - #[cfg(feature = "arbitrary_precision")] - { + if self.cfg.use_rawnumber || self.cfg.use_raw { let start = self.read.index() - 1; self.skip_number(first)?; let slice = self.read.slice_unchecked(start, self.read.index()); - check_visit!(self, visitor.visit_borrowed_raw_number(as_str(slice))); - Ok(()) - } - - #[cfg(not(feature = "arbitrary_precision"))] - { + check_visit!(self, vis.visit_borrowed_raw_number(as_str(slice))) + } else { let ok = match self.parse_number(first)? { - ParserNumber::Float(f) => visitor.visit_f64(f), - ParserNumber::Unsigned(f) => visitor.visit_u64(f), - ParserNumber::Signed(f) => visitor.visit_i64(f), + ParserNumber::Float(f) => vis.visit_f64(f), + ParserNumber::Unsigned(f) => vis.visit_u64(f), + ParserNumber::Signed(f) => vis.visit_i64(f), }; - check_visit!(self, ok); - Ok(()) + check_visit!(self, ok) } } #[inline(always)] - fn parse_array(&mut self, visitor: &mut V) -> Result<()> + fn parse_array(&mut self, vis: &mut V) -> Result<()> where V: JsonVisitor<'de>, { // parsing empty array - check_visit!(self, visitor.visit_array_start(0)); + check_visit!(self, vis.visit_array_start(0))?; let mut first = match self.skip_space() { - Some(b']') => { - check_visit!(self, visitor.visit_array_end(0)); - return Ok(()); - } + Some(b']') => return check_visit!(self, vis.visit_array_end(0)), first => first, }; let mut count = 0; loop { match first { - Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_inplace(c, visitor), - Some(b'"') => self.parse_string_inplace(visitor), - Some(b'{') => self.parse_object(visitor), - Some(b'[') => self.parse_array(visitor), - Some(first) => self.parse_literal_visit(first, visitor), + Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_inplace(c, vis), + Some(b'"') => self.parse_string_inplace(vis), + Some(b'{') => self.parse_object(vis), + Some(b'[') => self.parse_array(vis), + Some(first) => self.parse_literal_visit(first, vis), None => perr!(self, EofWhileParsing), }?; count += 1; first = match self.skip_space() { - Some(b']') => { - check_visit!(self, visitor.visit_array_end(count)); - return Ok(()); - } + Some(b']') => return check_visit!(self, vis.visit_array_end(count)), Some(b',') => self.skip_space(), _ => return perr!(self, ExpectedArrayCommaOrEnd), }; @@ -353,40 +366,30 @@ where } #[inline(always)] - fn parse_object(&mut self, visitor: &mut V) -> Result<()> + fn parse_object(&mut self, vis: &mut V) -> Result<()> where V: JsonVisitor<'de>, { // parsing empty object let mut count: usize = 0; - check_visit!(self, visitor.visit_object_start(0)); + check_visit!(self, vis.visit_object_start(0))?; match self.skip_space() { - Some(b'}') => { - check_visit!(self, visitor.visit_object_end(0)); - return Ok(()); - } + Some(b'}') => return check_visit!(self, vis.visit_object_end(0)), Some(b'"') => {} - _ => { - return perr!(self, ExpectObjectKeyOrEnd); - } + _ => return perr!(self, ExpectObjectKeyOrEnd), } // loop for each object key and value loop { - self.parse_string_inplace(visitor)?; + self.parse_string_inplace(vis)?; self.parse_object_clo()?; - self.parse_value(visitor)?; + self.parse_value(vis)?; count += 1; match self.skip_space() { - Some(b'}') => { - check_visit!(self, visitor.visit_object_end(count)); - return Ok(()); - } + Some(b'}') => return check_visit!(self, vis.visit_object_end(count)), Some(b',') => match self.skip_space() { Some(b'"') => continue, - _ => { - return perr!(self, ExpectObjectKeyOrEnd); - } + _ => return perr!(self, ExpectObjectKeyOrEnd), }, _ => return perr!(self, ExpectedArrayCommaOrEnd), } @@ -394,7 +397,7 @@ where } #[inline(always)] - fn parse_literal_visit(&mut self, first: u8, visitor: &mut V) -> Result<()> + fn parse_literal_visit(&mut self, first: u8, vis: &mut V) -> Result<()> where V: JsonVisitor<'de>, { @@ -408,17 +411,16 @@ where let reader = &mut self.read; if let Some(chunk) = reader.next_n(literal.len()) { if chunk != literal.as_bytes() { - perr!(self, InvalidLiteral) - } else { - let ok = match first { - b't' => visitor.visit_bool(true), - b'f' => visitor.visit_bool(false), - b'n' => visitor.visit_null(), - _ => unreachable!(), - }; - check_visit!(self, ok); - Ok(()) + return perr!(self, InvalidLiteral); } + + let ok = match first { + b't' => vis.visit_bool(true), + b'f' => vis.visit_bool(false), + b'n' => vis.visit_null(), + _ => unreachable!(), + }; + check_visit!(self, ok) } else { perr!(self, EofWhileParsing) } @@ -506,250 +508,103 @@ where } #[inline(always)] - pub(crate) fn parse_dom(&mut self, visitor: &mut V) -> Result<()> + pub(crate) fn parse_dom(&mut self, vis: &mut V) -> Result<()> where V: JsonVisitor<'de>, { - check_visit!(self, visitor.visit_dom_start()); - self.parse_value(visitor)?; - check_visit!(self, visitor.visit_dom_end()); - Ok(()) + check_visit!(self, vis.visit_dom_start())?; + self.parse_value(vis)?; + check_visit!(self, vis.visit_dom_end()) } - pub(crate) fn parse_value_without_padding(&mut self, visitor: &mut V) -> Result<()> + #[inline(always)] + pub(crate) fn parse_dom2(&mut self, vis: &mut V, strbuf: &mut Vec) -> Result<()> where V: JsonVisitor<'de>, { - check_visit!(self, visitor.visit_dom_start()); - const COMMON_DEPTH: usize = 20; - const ARR_MASK: u32 = 1u32 << 31; - const OBJ_MASK: u32 = 0u32; - let mut depth = SmallVec::<[u32; COMMON_DEPTH]>::new(); - let mut c: u8; + check_visit!(self, vis.visit_dom_start())?; + self.parse_value2(vis, strbuf)?; + check_visit!(self, vis.visit_dom_end()) + } - enum Fsm { - ScopeEnd, - ArrVal, - ObjKey, + pub(crate) fn parse_value2>( + &mut self, + vis: &mut V, + strbuf: &mut Vec, + ) -> Result<()> { + match self.skip_space() { + Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_visit(c, vis), + Some(b'"') => self.parse_string_owned(vis, strbuf), + Some(b'{') => self.parse_object2(vis, strbuf), + Some(b'[') => self.parse_array2(vis, strbuf), + Some(first) => self.parse_literal_visit(first, vis), + None => perr!(self, EofWhileParsing), } + } - let mut strbuf = Vec::with_capacity(DEFAULT_KEY_BUF_CAPACITY); - let mut state; - match self.skip_space2() { - b'[' => { - check_visit!(self, visitor.visit_array_start(0)); - depth.push(ARR_MASK); - c = self.skip_space2(); - if c == b']' { - check_visit!(self, visitor.visit_array_end(0)); - state = Fsm::ScopeEnd; - } else { - state = Fsm::ArrVal; - } - } - b'{' => { - check_visit!(self, visitor.visit_object_start(0)); - depth.push(OBJ_MASK); - c = self.skip_space2(); - if c == b'}' { - check_visit!(self, visitor.visit_object_end(0)); - state = Fsm::ScopeEnd; - } else { - state = Fsm::ObjKey; - } - } - c @ b'-' | c @ b'0'..=b'9' => { - self.parse_number_visit(c, visitor)?; - check_visit!(self, visitor.visit_dom_end()); - return Ok(()); - } - b'"' => { - self.parse_string_owned(visitor, &mut strbuf)?; - check_visit!(self, visitor.visit_dom_end()); - return Ok(()); - } - 0 => { - return perr!(self, EofWhileParsing); - } - first => { - self.parse_literal_visit(first, visitor)?; - check_visit!(self, visitor.visit_dom_end()); - return Ok(()); - } + pub(crate) fn parse_object2>( + &mut self, + vis: &mut V, + strbuf: &mut Vec, + ) -> Result<()> { + // parsing empty object + let mut count: usize = 0; + check_visit!(self, vis.visit_object_start(0))?; + match self.skip_space() { + Some(b'}') => return check_visit!(self, vis.visit_object_end(0)), + Some(b'"') => {} + _ => return perr!(self, ExpectObjectKeyOrEnd), } + // loop for each object key and value loop { - match state { - Fsm::ArrVal => { - 'arr_val: loop { - match c { - b'{' => { - check_visit!(self, visitor.visit_object_start(0)); - depth.push(OBJ_MASK); - c = self.skip_space2(); - if c == b'}' { - check_visit!(self, visitor.visit_object_end(0)); - state = Fsm::ScopeEnd; - } else { - state = Fsm::ObjKey; - } - break 'arr_val; - } - b'[' => { - check_visit!(self, visitor.visit_array_start(0)); - depth.push(ARR_MASK); - c = self.skip_space2(); - if c == b']' { - check_visit!(self, visitor.visit_array_end(0)); - state = Fsm::ScopeEnd; - break 'arr_val; - } - - continue 'arr_val; - } - c @ b'-' | c @ b'0'..=b'9' => self.parse_number_visit(c, visitor)?, - b'"' => self.parse_string_owned(visitor, &mut strbuf)?, - first => self.parse_literal_visit(first, visitor)?, - } - // count after array primitive value end - let len = depth.len(); - depth[len - 1] += 1; - match self.skip_space2() { - b',' => { - c = self.skip_space2(); - continue 'arr_val; - } - b']' => { - let back = depth[depth.len() - 1]; - check_visit!( - self, - visitor.visit_array_end((back & (ARR_MASK - 1)) as usize) - ); - state = Fsm::ScopeEnd; - break 'arr_val; - } - _ => return perr!(self, ExpectedArrayCommaOrEnd), - } - } - } - Fsm::ObjKey => { - 'obj_key: loop { - if c != b'"' { - return perr!(self, ExpectObjectKeyOrEnd); - } - self.parse_string_owned(visitor, &mut strbuf)?; - self.parse_object_clo()?; - match self.skip_space2() { - b'{' => { - check_visit!(self, visitor.visit_object_start(0)); - depth.push(OBJ_MASK); - c = self.skip_space2(); - if c == b'}' { - check_visit!(self, visitor.visit_object_end(0)); - state = Fsm::ScopeEnd; - break 'obj_key; - } - - continue 'obj_key; - } - b'[' => { - check_visit!(self, visitor.visit_array_start(0)); - depth.push(ARR_MASK); - c = self.skip_space2(); - if c == b']' { - check_visit!(self, visitor.visit_array_end(0)); - state = Fsm::ScopeEnd; - } else { - state = Fsm::ArrVal; - } - break 'obj_key; - } - c @ b'-' | c @ b'0'..=b'9' => self.parse_number_visit(c, visitor)?, - b'"' => self.parse_string_owned(visitor, &mut strbuf)?, - first => self.parse_literal_visit(first, visitor)?, - } - // count after object primitive value end - let len = depth.len(); - depth[len - 1] += 1; - match self.skip_space2() { - b',' => { - c = self.skip_space2(); - - continue 'obj_key; - } - b'}' => { - let back = depth[depth.len() - 1]; - check_visit!( - self, - visitor.visit_object_end((back & (ARR_MASK - 1)) as usize) - ); - state = Fsm::ScopeEnd; - break 'obj_key; - } - _ => return perr!(self, ExpectedArrayCommaOrEnd), - } - } - } - Fsm::ScopeEnd => { - 'scope_end: loop { - depth.pop(); - if depth.is_empty() { - // Note: we will not check trailing characters - // because get_from maybe returns all remaining bytes. - check_visit!(self, visitor.visit_dom_end()); - return Ok(()); - } - // count after container value end - let len = depth.len(); - depth[len - 1] += 1; - c = self.skip_space2(); - if (depth[len - 1] & ARR_MASK) != 0 { - // parent is array - match c { - b',' => { - c = self.skip_space2(); - state = Fsm::ArrVal; - - break 'scope_end; - } - b']' => { - let back = depth[depth.len() - 1]; - check_visit!( - self, - visitor.visit_array_end((back & (ARR_MASK - 1)) as usize) - ); - - continue 'scope_end; - } - _ => return perr!(self, ExpectedArrayCommaOrEnd), - } - } else { - // parent is object - match c { - b',' => { - c = self.skip_space2(); - state = Fsm::ObjKey; - - break 'scope_end; - } - b'}' => { - let back = depth[depth.len() - 1]; - check_visit!( - self, - visitor.visit_object_end((back & (ARR_MASK - 1)) as usize) - ); - - continue 'scope_end; - } - _ => return perr!(self, ExpectedObjectCommaOrEnd), - } - } - } - } + self.parse_string_owned(vis, strbuf)?; + self.parse_object_clo()?; + self.parse_value2(vis, strbuf)?; + count += 1; + match self.skip_space() { + Some(b'}') => return check_visit!(self, vis.visit_object_end(count)), + Some(b',') => match self.skip_space() { + Some(b'"') => continue, + _ => return perr!(self, ExpectObjectKeyOrEnd), + }, + _ => return perr!(self, ExpectedArrayCommaOrEnd), } } } + pub(crate) fn parse_array2>( + &mut self, + visitor: &mut V, + strbuf: &mut Vec, + ) -> Result<()> { + // parsing empty array + check_visit!(self, visitor.visit_array_start(0))?; + + let mut first = match self.skip_space() { + Some(b']') => return check_visit!(self, visitor.visit_array_end(0)), + first => first, + }; + + let mut count = 0; + loop { + match first { + Some(c @ b'-' | c @ b'0'..=b'9') => self.parse_number_visit(c, visitor), + Some(b'"') => self.parse_string_owned(visitor, strbuf), + Some(b'{') => self.parse_object2(visitor, strbuf), + Some(b'[') => self.parse_array2(visitor, strbuf), + Some(first) => self.parse_literal_visit(first, visitor), + None => perr!(self, EofWhileParsing), + }?; + count += 1; + first = match self.skip_space() { + Some(b']') => return check_visit!(self, visitor.visit_array_end(count)), + Some(b',') => self.skip_space(), + _ => return perr!(self, ExpectedArrayCommaOrEnd), + }; + } + } + #[inline(always)] pub(crate) fn parse_str_impl<'own>( &mut self, @@ -969,7 +824,7 @@ where #[inline(always)] fn get_next_token(&mut self, tokens: [u8; N], advance: usize) -> Option { let r = &mut self.read; - const LANS: usize = u8x32::lanes(); + const LANS: usize = u8x32::LANES; while let Some(chunk) = r.peek_n(LANS) { let v = unsafe { u8x32::from_slice_unaligned_unchecked(chunk) }; let mut vor = m8x32::splat(false); @@ -998,11 +853,17 @@ where None } + #[inline(always)] + unsafe fn skip_string_unchecked2(&mut self) -> Result<()> { + let _ = self.skip_string_unchecked()?; + Ok(()) + } + // skip_string skips a JSON string, and return the later parts after closed quote, and the // escaped status. skip_string always start with the quote marks. #[inline(always)] - fn skip_string_impl(&mut self) -> Result { - const LANS: usize = u8x32::lanes(); + unsafe fn skip_string_unchecked(&mut self) -> Result { + const LANS: usize = u8x32::LANES; let r = &mut self.read; let mut quote_bits; let mut escaped; @@ -1051,11 +912,6 @@ where perr!(self, EofWhileParsing) } - #[inline(always)] - fn skip_string_unchecked(&mut self) -> Result { - self.skip_string_impl() - } - fn skip_escaped_chars(&mut self) -> Result<()> { match self.read.peek() { Some(b'u') => { @@ -1081,7 +937,7 @@ where // skip_string skips a JSON string with validation. #[inline(always)] fn skip_string(&mut self) -> Result { - const LANS: usize = u8x32::lanes(); + const LANS: usize = u8x32::LANES; let mut status = ParseStatus::None; while let Some(chunk) = self.read.peek_n(LANS) { @@ -1310,65 +1166,6 @@ where None } - #[inline(always)] - pub(crate) fn skip_space2(&mut self) -> u8 { - let reader = &mut self.read; - // fast path 1: for nospace or single space - // most JSON is like ` "name": "balabala" ` - if let Some(ch) = reader.next() { - if !is_whitespace(ch) { - return ch; - } - } - if let Some(ch) = reader.next() { - if !is_whitespace(ch) { - return ch; - } - } - - // fast path 2: reuse the bitmap for short key or numbers - let nospace_offset = (reader.index() as isize) - self.nospace_start; - if nospace_offset < 64 { - let bitmap = { - let mask = !((1 << nospace_offset) - 1); - self.nospace_bits & mask - }; - if bitmap != 0 { - let cnt = bitmap.trailing_zeros() as usize; - let ch = reader.at(self.nospace_start as usize + cnt); - reader.set_index(self.nospace_start as usize + cnt + 1); - - return ch; - } else { - // we can still fast skip the marked space in here. - reader.set_index(self.nospace_start as usize + 64); - } - } - - // then we use simd to accelerate skipping space - while let Some(chunk) = reader.peek_n(64) { - let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) }; - let bitmap = unsafe { get_nonspace_bits(chunk) }; - if bitmap != 0 { - self.nospace_bits = bitmap; - self.nospace_start = reader.index() as isize; - let cnt = bitmap.trailing_zeros() as usize; - let ch = chunk[cnt]; - reader.eat(cnt + 1); - - return ch; - } - reader.eat(64) - } - - while let Some(ch) = reader.next() { - if !is_whitespace(ch) { - return ch; - } - } - 0 - } - #[inline(always)] pub(crate) fn skip_space_peek(&mut self) -> Option { let ret = self.skip_space()?; @@ -1563,7 +1360,7 @@ where match ch { Some(b'-' | b'0'..=b'9') => self.skip_number_unsafe(), Some(b'"') => { - status = self.skip_string_unchecked()?; + status = unsafe { self.skip_string_unchecked() }?; Ok(()) } Some(b'{') => self.skip_container(b'{', b'}'), @@ -1630,9 +1427,9 @@ where match self.skip_space() { Some(b'{') => self.skip_container(b'{', b'}')?, Some(b'[') => self.skip_container(b'[', b']')?, - Some(b'"') => { + Some(b'"') => unsafe { let _ = self.skip_string_unchecked()?; - } + }, None => return perr!(self, EofWhileParsing), _ => {} }; @@ -1736,9 +1533,7 @@ where match self.skip_space() { Some(b'{') => self.skip_container(b'{', b'}')?, Some(b'[') => self.skip_container(b'[', b']')?, - Some(b'"') => { - let _ = self.skip_string_unchecked()?; - } + Some(b'"') => unsafe { self.skip_string_unchecked2() }?, Some(b']') => return perr!(self, GetInEmptyArray), None => return perr!(self, EofWhileParsing), _ => {} @@ -1759,7 +1554,7 @@ where Ok(()) } - pub(crate) fn get_from_with_iter( + pub(crate) fn get_from_with_iter_unchecked( &mut self, path: P, ) -> Result<(&'de [u8], ParseStatus)> @@ -1780,7 +1575,7 @@ where self.skip_one() } - pub(crate) fn get_from_with_iter_checked( + pub(crate) fn get_from_with_iter( &mut self, path: P, ) -> Result<(&'de [u8], ParseStatus)> @@ -1894,9 +1689,7 @@ where match self.skip_space() { Some(b'{') => self.skip_container(b'{', b'}')?, Some(b'[') => self.skip_container(b'[', b']')?, - Some(b'"') => { - let _ = self.skip_string_unchecked()?; - } + Some(b'"') => unsafe { self.skip_string_unchecked2() }?, None => return perr!(self, EofWhileParsing), _ => {} }; @@ -1977,11 +1770,13 @@ where } #[cfg(test)] + #[allow(dead_code)] pub(crate) fn remain_str(&self) -> &'de str { as_str(self.remain_u8_slice()) } #[cfg(test)] + #[allow(dead_code)] pub(crate) fn remain_u8_slice(&self) -> &'de [u8] { let reader = &self.read; let start = reader.index(); @@ -2021,9 +1816,7 @@ where match self.skip_space() { Some(b'{') => self.skip_container(b'{', b'}')?, Some(b'[') => self.skip_container(b'[', b']')?, - Some(b'"') => { - let _ = self.skip_string_unchecked()?; - } + Some(b'"') => unsafe { self.skip_string_unchecked2() }?, None => return perr!(self, EofWhileParsing), _ => {} }; diff --git a/src/reader.rs b/src/reader.rs index c2ffb1a..26eb857 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -55,7 +55,6 @@ where /// Trait is used by the deserializer for iterating over input. And it is sealed and cannot be /// implemented for types outside of sonic_rs. - #[doc(hidden)] pub trait Reader<'de>: Sealed { fn remain(&self) -> usize; diff --git a/src/serde/de.rs b/src/serde/de.rs index 959161d..1a4ee60 100644 --- a/src/serde/de.rs +++ b/src/serde/de.rs @@ -45,6 +45,18 @@ impl<'de, R: Reader<'de>> Deserializer { } } + /// Parse all number as `RawNumber` + pub fn use_rawnumber(mut self) -> Self { + self.parser.cfg.use_rawnumber = true; + self + } + + /// Parse all number as `RawNumber` and parse all string as `RawJSON` + pub fn use_raw(mut self) -> Self { + self.parser.cfg.use_raw = true; + self + } + /// Deserialize a JSON stream to a Rust data structure. /// /// It can be used repeatedly and we do not check trailing chars after deserilalized. @@ -311,7 +323,8 @@ impl<'de, R: Reader<'de>> Deserializer { let mut val = Value::new(); let val = if self.parser.read.index() == 0 { // get n to check trailing characters in later - let n = val.parse_with_padding(self.parser.read.as_u8_slice())?; + let cfg = self.parser.cfg; + let n = val.parse_with_padding(self.parser.read.as_u8_slice(), cfg)?; self.parser.read.eat(n); val } else { @@ -324,7 +337,7 @@ impl<'de, R: Reader<'de>> Deserializer { }; // deserialize some json parts into `Value`, not use padding buffer, avoid the memory // copy - val.parse_without_padding(shared, &mut self.parser)?; + val.parse_without_padding(shared, &mut self.scratch, &mut self.parser)?; val }; @@ -401,13 +414,10 @@ impl<'de, 'a, R: Reader<'de>> de::Deserializer<'de> for &'a mut Deserializer visitor.visit_bool(false) } c @ b'-' | c @ b'0'..=b'9' => tri!(self.parser.parse_number(c)).visit(visitor), - b'"' => { - self.scratch.clear(); - match tri!(self.parser.parse_str_impl(&mut self.scratch)) { - Reference::Borrowed(s) => visitor.visit_borrowed_str(s), - Reference::Copied(s) => visitor.visit_str(s), - } - } + b'"' => match tri!(self.parser.parse_str_impl(&mut self.scratch)) { + Reference::Borrowed(s) => visitor.visit_borrowed_str(s), + Reference::Copied(s) => visitor.visit_str(s), + }, b'[' => { let ret = { let _ = DepthGuard::guard(self); @@ -556,13 +566,10 @@ impl<'de, 'a, R: Reader<'de>> de::Deserializer<'de> for &'a mut Deserializer }; let value = match peek { - b'"' => { - self.scratch.clear(); - match tri!(self.parser.parse_str_impl(&mut self.scratch)) { - Reference::Borrowed(s) => visitor.visit_borrowed_str(s), - Reference::Copied(s) => visitor.visit_str(s), - } - } + b'"' => match tri!(self.parser.parse_str_impl(&mut self.scratch)) { + Reference::Borrowed(s) => visitor.visit_borrowed_str(s), + Reference::Copied(s) => visitor.visit_str(s), + }, _ => Err(self.peek_invalid_type(peek, &visitor)), }; @@ -1125,29 +1132,6 @@ macro_rules! deserialize_numeric_key { }; } -impl<'de, 'a, R> MapKey<'a, R> -where - R: Reader<'de>, -{ - fn deserialize_number(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.de.parser.read.peek() { - Some(b'0'..=b'9' | b'-') => {} - _ => return Err(self.de.parser.error(ErrorCode::ExpectedNumericKey)), - } - - let value = tri!(self.de.deserialize_number(visitor)); - - if self.de.parser.read.next() != Some(b'"') { - return Err(self.de.parser.error(ErrorCode::ExpectedQuote)); - } - - Ok(value) - } -} - impl<'de, 'a, R> de::Deserializer<'de> for MapKey<'a, R> where R: Reader<'de>, @@ -1268,6 +1252,11 @@ where T: de::Deserialize<'de>, { let mut de = Deserializer::new(read); + #[cfg(feature = "arbitrary_precision")] + { + de = de.use_rawnumber(); + } + let value = tri!(de::Deserialize::deserialize(&mut de)); // Make sure the whole stream has been consumed. diff --git a/src/serde/rawnumber.rs b/src/serde/rawnumber.rs index aec3688..6ee31b4 100644 --- a/src/serde/rawnumber.rs +++ b/src/serde/rawnumber.rs @@ -38,6 +38,10 @@ impl RawNumber { Self { n: FastStr::new(s) } } + pub(crate) fn from_faststr(n: FastStr) -> Self { + Self { n } + } + /// as_str returns the underlying string representation of the number. pub fn as_str(&self) -> &str { self.n.as_str() diff --git a/src/serde/ser.rs b/src/serde/ser.rs index ad5cbc2..6e17f36 100644 --- a/src/serde/ser.rs +++ b/src/serde/ser.rs @@ -700,7 +700,10 @@ where Compound::Map { .. } => ser::SerializeMap::serialize_entry(self, key, value), Compound::RawValue { ser, .. } => { - if key == crate::serde::rawnumber::TOKEN || key == crate::lazyvalue::TOKEN { + if key == crate::serde::rawnumber::TOKEN + || key == crate::lazyvalue::TOKEN + || key == crate::value::Value::RAW_TOKEN + { value.serialize(RawValueStrEmitter(ser)) } else { Err(invalid_raw_value()) @@ -763,6 +766,7 @@ struct MapKeySerializer<'a, W: 'a, F: 'a> { ser: &'a mut Serializer, } +// TODO: fix the error info fn invalid_raw_value() -> Error { Error::ser_error(ErrorCode::InvalidJsonValue) } @@ -778,7 +782,7 @@ macro_rules! quote { .formatter .begin_string(&mut $self.ser.writer) .map_err(Error::io)); - tri!($value); + tri!($value.map_err(Error::io)); return $self .ser .formatter @@ -823,116 +827,83 @@ where type SerializeTupleStruct = Impossible<(), Error>; type SerializeTupleVariant = Impossible<(), Error>; type SerializeMap = Impossible<(), Error>; - type SerializeStruct = Impossible<(), Error>; + type SerializeStruct = Compound<'a, W, F>; type SerializeStructVariant = Impossible<(), Error>; fn serialize_bool(self, value: bool) -> Result<()> { quote!( self, - self.ser - .formatter - .write_bool(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_bool(&mut self.ser.writer, value) ); } fn serialize_i8(self, value: i8) -> Result<()> { quote!( self, - self.ser - .formatter - .write_i8(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_i8(&mut self.ser.writer, value) ); } fn serialize_i16(self, value: i16) -> Result<()> { quote!( self, - self.ser - .formatter - .write_i16(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_i16(&mut self.ser.writer, value) ); } fn serialize_i32(self, value: i32) -> Result<()> { quote!( self, - self.ser - .formatter - .write_i32(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_i32(&mut self.ser.writer, value) ); } fn serialize_i64(self, value: i64) -> Result<()> { quote!( self, - self.ser - .formatter - .write_i64(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_i64(&mut self.ser.writer, value) ); } fn serialize_i128(self, value: i128) -> Result<()> { quote!( self, - self.ser - .formatter - .write_i128(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_i128(&mut self.ser.writer, value) ); } fn serialize_u8(self, value: u8) -> Result<()> { quote!( self, - self.ser - .formatter - .write_u8(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_u8(&mut self.ser.writer, value) ); } fn serialize_u16(self, value: u16) -> Result<()> { quote!( self, - self.ser - .formatter - .write_u16(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_u16(&mut self.ser.writer, value) ); } fn serialize_u32(self, value: u32) -> Result<()> { quote!( self, - self.ser - .formatter - .write_u32(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_u32(&mut self.ser.writer, value) ); } fn serialize_u64(self, value: u64) -> Result<()> { quote!( self, - self.ser - .formatter - .write_u64(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_u64(&mut self.ser.writer, value) ); } fn serialize_u128(self, value: u128) -> Result<()> { quote!( self, - self.ser - .formatter - .write_u128(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_u128(&mut self.ser.writer, value) ); } @@ -940,11 +911,8 @@ where if value.is_finite() { quote!( self, - self.ser - .formatter - .write_f32(&mut self.ser.writer, value) - .map_err(Error::io) - ); + self.ser.formatter.write_f32(&mut self.ser.writer, value) + ) } else { Err(key_must_be_str_or_num(Unexpected::Other( "NaN or Infinite f32", @@ -956,10 +924,7 @@ where if value.is_finite() { quote!( self, - self.ser - .formatter - .write_f64(&mut self.ser.writer, value) - .map_err(Error::io) + self.ser.formatter.write_f64(&mut self.ser.writer, value) ); } else { Err(key_must_be_str_or_num(Unexpected::Other( @@ -1039,7 +1004,11 @@ where } fn serialize_struct(self, name: &'static str, _len: usize) -> Result { - Err(key_must_be_str_or_num(Unexpected::Other(name))) + if name == crate::value::Value::RAW_TOKEN { + Ok(Compound::RawValue { ser: self.ser }) + } else { + Err(key_must_be_str_or_num(Unexpected::Other(name))) + } } fn serialize_struct_variant( diff --git a/src/util/num/common.rs b/src/util/num/common.rs index 18996f4..f146d71 100644 --- a/src/util/num/common.rs +++ b/src/util/num/common.rs @@ -10,9 +10,6 @@ pub(crate) trait ByteSlice { /// Write a 64-bit integer as 8 bytes in little-endian order. fn write_u64(&mut self, value: u64); - /// Calculate the offset of a slice from another. - fn offset_from(&self, other: &Self) -> isize; - /// Iteratively parse and consume digits from bytes. /// Returns the same bytes with consumed digits being /// elided. @@ -32,11 +29,6 @@ impl ByteSlice for [u8] { self[..8].copy_from_slice(&value.to_le_bytes()) } - #[inline] - fn offset_from(&self, other: &Self) -> isize { - other.len() as isize - self.len() as isize - } - #[inline] fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self { let mut s = self; diff --git a/src/util/num/float.rs b/src/util/num/float.rs index 631af31..dd3d6b1 100644 --- a/src/util/num/float.rs +++ b/src/util/num/float.rs @@ -89,18 +89,22 @@ pub trait RawFloat: /// This is only called in the fast-path algorithm, and therefore /// will not lose precision, since the value will always have /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH. + #[allow(unused)] fn from_u64(v: u64) -> Self; /// Performs a raw transmutation from an integer. fn from_u64_bits(v: u64) -> Self; /// Get a small power-of-ten for fast-path multiplication. + #[allow(unused)] fn pow10_fast_path(exponent: usize) -> Self; /// Returns the category that this number falls into. + #[allow(unused)] fn classify(self) -> FpCategory; /// Returns the mantissa, exponent and sign as integers. + #[allow(unused)] fn integer_decode(self) -> (u64, i16, i8); } diff --git a/src/util/simd/avx2.rs b/src/util/simd/avx2.rs index 2bb6e4e..a1c1481 100644 --- a/src/util/simd/avx2.rs +++ b/src/util/simd/avx2.rs @@ -5,7 +5,6 @@ use std::{ }; use super::{Mask, Simd}; -use crate::impl_lanes; #[derive(Debug)] #[repr(transparent)] @@ -56,10 +55,6 @@ impl Simd for Simd256i { #[repr(transparent)] pub struct Mask256(__m256i); -impl_lanes!(Simd256u, 32); - -impl_lanes!(Mask256, 32); - impl Mask for Mask256 { type BitMask = u32; type Element = u8; diff --git a/src/util/simd/mod.rs b/src/util/simd/mod.rs index a7431a7..49245f4 100644 --- a/src/util/simd/mod.rs +++ b/src/util/simd/mod.rs @@ -1,18 +1,8 @@ +#[allow(unused_imports)] +#[allow(unused)] pub mod bits; mod traits; -#[doc(hidden)] -#[macro_export] -macro_rules! impl_lanes { - ($simd: ty, $lane: expr) => { - impl $simd { - pub const fn lanes() -> usize { - $lane - } - } - }; -} - // pick v128 simd cfg_if::cfg_if! { if #[cfg(target_feature = "sse2")] { @@ -45,12 +35,12 @@ pub use self::traits::{BitMask, Mask, Simd}; mod v512; use self::v512::*; +#[cfg(all(target_feature = "neon", target_arch = "aarch64"))] pub type u8x16 = Simd128u; + pub type u8x32 = Simd256u; pub type u8x64 = Simd512u; pub type i8x32 = Simd256i; -pub type m8x16 = Mask128; pub type m8x32 = Mask256; -pub type m8x64 = Mask512; diff --git a/src/util/simd/sse2.rs b/src/util/simd/sse2.rs index f4065a7..666f2b6 100644 --- a/src/util/simd/sse2.rs +++ b/src/util/simd/sse2.rs @@ -4,7 +4,6 @@ use std::{ }; use super::{Mask, Simd}; -use crate::impl_lanes; #[derive(Debug)] #[repr(transparent)] @@ -56,10 +55,6 @@ impl Simd for Simd128i { #[repr(transparent)] pub struct Mask128(__m128i); -impl_lanes!(Simd128u, 16); - -impl_lanes!(Mask128, 16); - impl Mask for Mask128 { type BitMask = u16; type Element = u8; diff --git a/src/util/simd/v128.rs b/src/util/simd/v128.rs index c52924d..06b2909 100644 --- a/src/util/simd/v128.rs +++ b/src/util/simd/v128.rs @@ -1,10 +1,5 @@ use std::ops::{BitAnd, BitOr, BitOrAssign}; -use crate::{ - impl_lanes, - util::simd::{Mask, Simd}, -}; - #[derive(Debug)] pub struct Simd128i([i8; 16]); diff --git a/src/util/simd/v256.rs b/src/util/simd/v256.rs index 0151564..9fb7de7 100644 --- a/src/util/simd/v256.rs +++ b/src/util/simd/v256.rs @@ -1,11 +1,6 @@ use std::ops::{BitAnd, BitOr, BitOrAssign}; use super::{bits::combine_u16, Mask, Mask128, Simd, Simd128i, Simd128u}; -use crate::impl_lanes; - -impl_lanes!(Simd256u, 32); - -impl_lanes!(Mask256, 32); #[derive(Debug)] #[repr(transparent)] diff --git a/src/util/simd/v512.rs b/src/util/simd/v512.rs index 0b6ead7..6d06104 100644 --- a/src/util/simd/v512.rs +++ b/src/util/simd/v512.rs @@ -1,11 +1,6 @@ use std::ops::{BitAnd, BitOr, BitOrAssign}; use super::{bits::combine_u32, Mask, Mask256, Simd, Simd256i, Simd256u}; -use crate::impl_lanes; - -impl_lanes!(Simd512u, 64); - -impl_lanes!(Mask512, 64); #[derive(Debug)] #[repr(transparent)] diff --git a/src/value/array.rs b/src/value/array.rs index 78d5459..2d9d1ec 100644 --- a/src/value/array.rs +++ b/src/value/array.rs @@ -297,11 +297,11 @@ impl Array { /// assert_eq!(arr, [2, 3, 4]); /// ``` #[inline] - pub fn retain_mut(&mut self, mut f: F) + pub fn retain_mut(&mut self, f: F) where F: FnMut(&mut Value) -> bool, { - if let ValueMut::Array(mut array) = self.0.as_mut() { + if let ValueMut::Array(array) = self.0.as_mut() { array.retain_mut(f); } else { panic!("Array::retain_mut: not an array"); @@ -355,7 +355,7 @@ impl Array { /// [`drain`]: Array::drain #[inline] pub fn truncate(&mut self, len: usize) { - if let ValueMut::Array(mut array) = self.0.as_mut() { + if let ValueMut::Array(array) = self.0.as_mut() { array.truncate(len); } else { panic!("Array::truncate: not an array"); @@ -380,7 +380,7 @@ impl Array { /// ``` #[inline] pub fn push>(&mut self, val: T) { - if let ValueMut::Array(mut array) = self.0.as_mut() { + if let ValueMut::Array(array) = self.0.as_mut() { array.push(val.into()); } else { panic!("Array::push: not an array"); @@ -397,13 +397,16 @@ impl Array { /// Returns the number of elements in the array. #[inline] pub fn len(&self) -> usize { - self.0.len() + self.0 + .as_value_slice() + .expect("call len in non-array type") + .len() } /// Returns `true` if the array contains no elements. #[inline] pub fn is_empty(&self) -> bool { - self.0.len() == 0 + self.len() == 0 } /// Extracts a mutable slice of the entire array. Equivalent to &mut s[..]. diff --git a/src/value/from.rs b/src/value/from.rs index 8aaecc5..e52948e 100644 --- a/src/value/from.rs +++ b/src/value/from.rs @@ -159,7 +159,7 @@ impl> From> for Value { for v in val { arr.append_value(v.into()); } - arr.into() + arr } } @@ -200,7 +200,7 @@ impl> From<&[T]> for Value { for v in val { arr.append_value(v.clone().into()); } - arr.into() + arr } } diff --git a/src/value/mod.rs b/src/value/mod.rs index 6de07c0..ce7000f 100644 --- a/src/value/mod.rs +++ b/src/value/mod.rs @@ -28,6 +28,3 @@ pub use self::object::Object; pub use self::ser::to_value; #[doc(inline)] pub use self::value_trait::{JsonContainerTrait, JsonType, JsonValueMutTrait, JsonValueTrait}; - -const MAX_STR_SIZE: usize = u32::MAX as usize; -const PTR_BITS: usize = 48; diff --git a/src/value/node.rs b/src/value/node.rs index 7a45d1e..558f28c 100644 --- a/src/value/node.rs +++ b/src/value/node.rs @@ -11,7 +11,10 @@ use std::{ use bumpalo::Bump; use faststr::FastStr; -use serde::ser::{Serialize, SerializeMap, SerializeSeq}; +use serde::{ + ser::{Serialize, SerializeMap, SerializeSeq, SerializeStruct}, + Serializer, +}; use super::{ object::Pair, @@ -20,18 +23,16 @@ use super::{ value_trait::{JsonContainerTrait, JsonValueMutTrait}, visitor::JsonVisitor, }; -#[cfg(feature = "arbitrary_precision")] -use crate::RawNumber; use crate::{ + config::DeserializeCfg, error::Result, index::Index, parser::Parser, - pointer::PointerNode, reader::{PaddedSliceRead, Reader}, serde::tri, util::string::str_from_raw_parts, value::{array::Array, object::Object, value_trait::JsonValueTrait}, - JsonNumberTrait, JsonType, Number, + JsonNumberTrait, JsonType, Number, RawNumber, }; /// Represents any valid JSON value. @@ -68,6 +69,38 @@ pub struct Value { pub(crate) data: Data, } +#[rustfmt::skip] +// A compact and mutable JSON Value. +// +// Thera are three kind nodes into the Value: +// - Static Node: no need drop +// - Owned Node : mutable +// - Shared Node: in SharedDom, not mutable +// +// | Kind | 3 bits | 5 bits | 24 bits | ----> 32 bits ----> | 32 bits | 32 bits | limit | +// |-------------|-----------------|-------------------|--------------------------------|-------------------------|----------------------| +// | Null | 0 | 0 | + | | +// | True | 0 | 1 | + | | +// | False | 0 | 2 | + | | +// | I64 | 0 | 3 | + i64 | | +// | U64 | 0 | 4 | + u64 | | +// | F64 | 0 | 5 | + f64 | | +// | empty arr | 0 | 6 | | +// | empty obj | 0 | 7 | | +// | static str | 0 | 8 | | string length + *const u8 | excced will fallback | +// | faststr | 1 | 0 | + Box | | +// |rawnum_fastst| 1 | 1 | + Box | | +// | arr_mut | 1 | 2 | + Box> | | +// | obj_mut | 1 | 3 | + Box> | | +// | str_node | 2 | node idx | string length + *const u8 | max len 2^32 | +// | raw_num_node| 3 | node idx | string length + *const u8 | max len 2^32 | +// | arr_node | 4 | node idx | array length + *const Node | max len 2^32 | +// | obj_node | 5 | node idx | object length + *const Pair | max len 2^32 | +// |str_esc_raw | 6 | *const RawStrHeader (in SharedDom, MUST aligned 8) + *const u8 | | +// | root_node | 7 | *const ShardDom (from Arc, MUST aligned 8) + *const Node (head) | | +// +// NB: we will check the JSON length when parsing, if JSON is > 2GB, will return a error, so we will not check the limits when parsing or using dom. +#[allow(clippy::box_collection)] pub(crate) union Data { pub(crate) uval: u64, pub(crate) ival: i64, @@ -84,7 +117,6 @@ pub(crate) union Data { pub(crate) obj_own: ManuallyDrop>>, pub(crate) arr_own: ManuallyDrop>>, - /// temp pub(crate) parent: u64, } @@ -102,66 +134,34 @@ impl RawStr { } } - unsafe fn set_header(&mut self, hdr: RawStrHeader) { - self.0.write(hdr); - } - - unsafe fn from_raw_parts_mut(ptr: *mut u8) -> RawStr { - let hdr = ptr.sub(size_of::() as usize); - let hdr = hdr as *mut RawStrHeader; - assert!(hdr.is_aligned()); - RawStr(NonNull::new_unchecked(hdr)) + unsafe fn set_str_len(&mut self, str_len: usize) { + let hdr = self.0.as_mut(); + hdr.str_len = str_len as u32; } - unsafe fn into_raw(this: Self) -> *mut u8 { - let hdr = this.0.as_ptr(); - let data = hdr.add(1) as *mut u8; - data + unsafe fn set_index(&mut self, idx: usize) { + let hdr = self.0.as_mut(); + hdr.node_idx = idx as u32; } - unsafe fn new_in(alloc: &mut Bump, s: &str) -> RawStr { - let data_size = s.len(); + pub(crate) unsafe fn new_in(alloc: &mut Bump, raw: &str) -> RawStr { + let data_size = raw.len(); let hdr_size = size_of::(); // aligned to hder, make sure tagged pointer in Meta let align = align_of::(); let layout = Layout::from_size_align(hdr_size + data_size, align).unwrap(); - let ptr = alloc.alloc_layout(layout).as_ptr() as *mut RawStrHeader; - RawStr(NonNull::new_unchecked(ptr)) + let hdr = alloc.alloc_layout(layout).as_ptr() as *mut RawStrHeader; + hdr.write(RawStrHeader { + node_idx: 0, // write later + str_len: 0, // write later + raw_len: data_size as u32, + }); + let dst = hdr.add(1) as *mut u8; + std::ptr::copy_nonoverlapping(raw.as_ptr(), dst, data_size); + RawStr(NonNull::new_unchecked(hdr)) } } -#[allow(clippy::empty_line_after_outer_attr)] -#[rustfmt::skip] -// A compact and mutable JSON Value. -// -// Thera are three kind nodes into the Value: -// - Static Node: no need drop -// - Owned Node : mutable -// - Shared Node: in SharedDom, not mutable -// -// | Kind | 3 bits | 5 bits | 24 bits | ----> 32 bits ----> | 32 bits | 32 bits | limit | -// |-------------|-----------------|-------------------|--------------------------------|-------------------------|----------------------| -// | Null | 0 | 0 | + | | -// | True | 0 | 1 | + | | -// | False | 0 | 2 | + | | -// | I64 | 0 | 3 | + i64 | | -// | U64 | 0 | 4 | + u64 | | -// | F64 | 0 | 5 | + f64 | | -// | empty arr | 0 | 6 | | -// | empty obj | 0 | 7 | | -// | static str | 0 | 8 | | string length + *const u8 | excced will fallback | -// | faststr | 1 | 0 | + Box | | -// |rawnum_fastst| 1 | 1 | + Box | | -// | arr_mut | 1 | 2 | + Box> | | -// | obj_mut | 1 | 3 | + Box> | | -// | str_node | 2 | node idx | string length + *const u8 | max len 2^32 | -// | raw_num_node| 3 | node idx | string length + *const u8 | max len 2^32 | -// | arr_node | 4 | node idx | array length + *const Node | max len 2^32 | -// | obj_node | 5 | node idx | object length + *const Pair | max len 2^32 | -// |str_esc_raw | 6 | *const RawStrHeader (in SharedDom, MUST aligned 8) + *const u8 | | -// | root_node | 7 | *const ShardDom (from Arc, MUST aligned 8) + *const Node (head) | | -// -// NB: we will check the JSON length when parsing, if JSON is > 2GB, will return a error, so we will not check the limits when parsing or using dom. #[derive(Debug, Copy, Clone)] #[repr(C, align(8))] pub(crate) struct RawStrHeader { @@ -181,15 +181,15 @@ pub(crate) union Meta { impl Meta { const STAIC_NODE: u64 = 0; - const NULL: u64 = 0 | (0 << Self::KIND_BITS); - const TRUE: u64 = 0 | (1 << Self::KIND_BITS); - const FALSE: u64 = 0 | (2 << Self::KIND_BITS); - const I64: u64 = 0 | (3 << Self::KIND_BITS); - const U64: u64 = 0 | (4 << Self::KIND_BITS); - const F64: u64 = 0 | (5 << Self::KIND_BITS); - const EMPTY_ARR: u64 = 0 | (6 << Self::KIND_BITS); - const EMPTY_OBJ: u64 = 0 | (7 << Self::KIND_BITS); - const STATIC_STR: u64 = 0 | (8 << Self::KIND_BITS); + const NULL: u64 = (0 << Self::KIND_BITS); + const TRUE: u64 = (1 << Self::KIND_BITS); + const FALSE: u64 = (2 << Self::KIND_BITS); + const I64: u64 = (3 << Self::KIND_BITS); + const U64: u64 = (4 << Self::KIND_BITS); + const F64: u64 = (5 << Self::KIND_BITS); + const EMPTY_ARR: u64 = (6 << Self::KIND_BITS); + const EMPTY_OBJ: u64 = (7 << Self::KIND_BITS); + const STATIC_STR: u64 = (8 << Self::KIND_BITS); const OWNED_NODE: u64 = 1; const FASTSTR: u64 = 1 | (0 << Self::KIND_BITS); @@ -256,14 +256,6 @@ impl Meta { val & 0x7 } - fn is_static(&self) -> bool { - self.get_kind() == Self::STAIC_NODE - } - - fn is_owned(&self) -> bool { - self.get_kind() == Self::OWNED_NODE - } - fn get_type(&self) -> u64 { let val = unsafe { self.val }; let typ = val & Self::TYPE_MASK; @@ -340,23 +332,17 @@ struct NodeInDom<'a> { dom: &'a Shared, } -struct UnpackedRawStr<'a> { - str: &'a str, - raw: &'a str, -} - impl<'a> NodeInDom<'a> { fn get_inner(&self) -> ValueRefInner<'a> { let typ = self.node.meta.get_type(); - let inner = match typ { + match typ { Meta::STR_NODE => ValueRefInner::Str(self.unpack_str()), Meta::RAWNUM_NODE => ValueRefInner::RawNum(self.unpack_str()), Meta::ARR_NODE => ValueRefInner::Array(self.unpack_value_slice()), Meta::OBJ_NODE => ValueRefInner::Object(self.unpack_pair_slice()), - Meta::ESC_RAW_NODE => ValueRefInner::Str(self.unpack_rawstr().str), + Meta::ESC_RAW_NODE => ValueRefInner::Str(self.unpack_str_from_raw()), _ => unreachable!("unknown type {typ} in dom"), - }; - inner + } } fn unpack_str(&self) -> &'a str { @@ -377,12 +363,10 @@ impl<'a> NodeInDom<'a> { unsafe { from_raw_parts(pairs, len) } } - fn unpack_rawstr(&self) -> UnpackedRawStr<'a> { - let (rp, hdr) = self.node.meta.unpack_rawstr_node().unpack(); + fn unpack_str_from_raw(&self) -> &'a str { + let (_, hdr) = self.node.meta.unpack_rawstr_node().unpack(); let sp = unsafe { self.node.data.dom_str.as_ptr() }; - let raw = unsafe { str_from_raw_parts(rp, hdr.raw_len as usize) }; - let str = unsafe { str_from_raw_parts(sp, hdr.str_len as usize) }; - UnpackedRawStr { str, raw } + unsafe { str_from_raw_parts(sp, hdr.str_len as usize) } } } @@ -398,7 +382,7 @@ impl<'a> From> for Value { } /// The value borrowed from the SharedDom -pub(crate) enum ValueDetail<'a> { +enum ValueDetail<'a> { Null, Bool(bool), Number(Number), @@ -490,37 +474,16 @@ impl Drop for Value { } pub(crate) enum ValueMut<'a> { - Null(&'a mut Value), - Bool(&'a mut Value), - Number(&'a mut Value), - Str(&'a mut Value), - RawNum(&'a mut Value), + Null, + Bool, + Number, + Str, + RawNum, Array(&'a mut Vec), Object(&'a mut Vec), } -#[derive(Debug)] -pub(crate) enum ValueRef2<'a> { - Null, - Bool(bool), - Number(Number), - Str(&'a str), - RawNum(&'a str), - Array(&'a [Value]), - Object(&'a [Pair]), - OwnArray(&'a Vec), - OwnObject(&'a Vec), -} - impl Value { - fn is_static_kind(&self) -> bool { - self.meta.get_kind() == Meta::STAIC_NODE - } - - fn is_owned_kind(&self) -> bool { - self.meta.get_kind() == Meta::OWNED_NODE - } - fn is_node_kind(&self) -> bool { matches!( self.meta.get_kind(), @@ -532,10 +495,6 @@ impl Value { ) } - fn is_root_kind(&self) -> bool { - self.meta.get_kind() == Meta::ROOT_NODE - } - fn is_raw_str(&self) -> bool { self.meta.get_type() == Meta::ESC_RAW_NODE } @@ -549,15 +508,13 @@ impl Value { pub(crate) fn as_mut(&mut self) -> ValueMut<'_> { let typ = self.meta.get_type(); match typ { - Meta::NULL => ValueMut::Null(self), - Meta::TRUE | Meta::FALSE => ValueMut::Bool(self), - Meta::F64 | Meta::I64 | Meta::U64 => ValueMut::Number(self), - Meta::STATIC_STR | Meta::STR_NODE | Meta::FASTSTR | Meta::ESC_RAW_NODE => { - ValueMut::Str(self) - } - Meta::RAWNUM_FASTSTR | Meta::RAWNUM_NODE => ValueMut::RawNum(self), - Meta::ARR_MUT => ValueMut::Array(unsafe { &mut *self.data.arr_own }), - Meta::OBJ_MUT => ValueMut::Object(unsafe { &mut *self.data.obj_own }), + Meta::NULL => ValueMut::Null, + Meta::TRUE | Meta::FALSE => ValueMut::Bool, + Meta::F64 | Meta::I64 | Meta::U64 => ValueMut::Number, + Meta::STATIC_STR | Meta::STR_NODE | Meta::FASTSTR | Meta::ESC_RAW_NODE => ValueMut::Str, + Meta::RAWNUM_FASTSTR | Meta::RAWNUM_NODE => ValueMut::RawNum, + Meta::ARR_MUT => ValueMut::Array(unsafe { &mut self.data.arr_own }), + Meta::OBJ_MUT => ValueMut::Object(unsafe { &mut self.data.obj_own }), Meta::ROOT_NODE | Meta::EMPTY_ARR | Meta::EMPTY_OBJ => { /* convert to mutable */ self.to_mut(); @@ -651,10 +608,10 @@ impl Value { node: self, dom: self.unpack_shared(), }), - Meta::FASTSTR => ValueDetail::FastStr(&*self.data.str_own), - Meta::RAWNUM_FASTSTR => ValueDetail::RawNumFasStr(&*self.data.str_own), - Meta::ARR_MUT => ValueDetail::Array(&*self.data.arr_own), - Meta::OBJ_MUT => ValueDetail::Object(&*self.data.obj_own), + Meta::FASTSTR => ValueDetail::FastStr(&self.data.str_own), + Meta::RAWNUM_FASTSTR => ValueDetail::RawNumFasStr(&self.data.str_own), + Meta::ARR_MUT => ValueDetail::Array(&self.data.arr_own), + Meta::OBJ_MUT => ValueDetail::Object(&self.data.obj_own), Meta::ROOT_NODE => ValueDetail::Root(NodeInDom { node: self.data.root.as_ref(), dom: &*self.meta.unpack_root(), @@ -687,7 +644,7 @@ impl Clone for Value { ValueDetail::Number(n) => n.into(), ValueDetail::StaticStr(s) => Value::from_static_str(s), ValueDetail::FastStr(s) => s.into(), - ValueDetail::RawNumFasStr(s) => Value::new_raw_num(s), + ValueDetail::RawNumFasStr(s) => Value::new_rawnum_faststr(s), ValueDetail::Array(a) => a.as_slice().into(), ValueDetail::Object(o) => o.as_slice().into(), ValueDetail::EmptyArray => Value::new_array(), @@ -778,9 +735,15 @@ impl super::value_trait::JsonValueTrait for Value { } } - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option { - todo!(); + match self.unpack_ref() { + ValueDetail::RawNumFasStr(s) => Some(RawNumber::from_faststr(s.clone())), + ValueDetail::NodeInDom(indom) | ValueDetail::Root(indom) => match indom.get_inner() { + ValueRefInner::RawNum(s) => Some(RawNumber::new(s)), + _ => None, + }, + _ => None, + } } #[inline] @@ -1069,22 +1032,6 @@ impl Value { } } - #[inline(always)] - fn at_pointer(&self, p: &PointerNode) -> Option<&Self> { - match p { - PointerNode::Key(key) => self.get_key(key), - PointerNode::Index(index) => self.get_index(*index), - } - } - - #[inline(always)] - fn at_pointer_mut(&mut self, p: &PointerNode) -> Option<&mut Self> { - match p { - PointerNode::Key(key) => self.get_key_mut(key).map(|v| v.0), - PointerNode::Index(index) => self.get_index_mut(*index), - } - } - #[doc(hidden)] #[inline] pub fn new_bool(val: bool) -> Self { @@ -1096,20 +1043,19 @@ impl Value { #[doc(hidden)] #[inline] - pub fn pack_str(idx: usize, val: &str) -> Self { + pub fn pack_str(kind: u64, idx: usize, val: &str) -> Self { let node_idx = idx as u32; // we check the json length when parsing, so val.len() should always be less than u32::MAX Value { - meta: Meta::pack_dom_node(Meta::STR_NODE, node_idx, val.len() as u32), + meta: Meta::pack_dom_node(kind, node_idx, val.len() as u32), data: Data { dom_str: unsafe { NonNull::new_unchecked(val.as_ptr() as *mut _) }, }, } } - #[doc(hidden)] #[inline] - pub fn pack_raw_str(val: &str, raw: RawStr) -> Self { + pub(crate) fn pack_raw_str(val: &str, raw: RawStr) -> Self { Value { meta: Meta::pack_rawstr(raw), data: Data { @@ -1118,7 +1064,8 @@ impl Value { } } - pub fn new_raw_num(num: &FastStr) -> Self { + #[inline] + pub(crate) fn new_rawnum_faststr(num: &FastStr) -> Self { let str_own = ManuallyDrop::new(Box::new(num.clone())); Value { meta: Meta::new(Meta::RAWNUM_FASTSTR), @@ -1126,6 +1073,15 @@ impl Value { } } + #[inline] + pub(crate) fn new_rawnum(num: &str) -> Self { + let str_own = ManuallyDrop::new(Box::new(FastStr::new(num))); + Value { + meta: Meta::new(Meta::RAWNUM_FASTSTR), + data: Data { str_own }, + } + } + pub(crate) fn len(&self) -> usize { match self.as_ref2() { ValueRefInner::Array(arr) => arr.len(), @@ -1135,13 +1091,6 @@ impl Value { } } - pub(crate) fn as_rawnum(&self) -> Option<&str> { - match self.as_ref2() { - ValueRefInner::RawNum(s) => Some(s), - _ => None, - } - } - pub(crate) fn as_value_slice(&self) -> Option<&[Value]> { match self.as_ref2() { ValueRefInner::Array(s) => Some(s), @@ -1158,28 +1107,6 @@ impl Value { } } - #[cfg(feature = "arbitrary_precision")] - #[doc(hidden)] - #[inline] - pub fn new_raw_num(num: &str, share: *const Shared) -> Self { - Value { - meta: Meta::new(RAWNUM, share), - data: Data { rawnum: num }, - } - } - - #[cfg(feature = "arbitrary_precision")] - #[doc(hidden)] - #[inline] - pub fn copy_raw_num(num: &str, share: &Shared) -> Self { - Value { - meta: Meta::new(RAWNUM, share), - data: Data { - rawnum: share.alloc.alloc_str(num), - }, - } - } - #[doc(hidden)] #[inline] pub fn copy_str(val: &str) -> Self { @@ -1192,12 +1119,12 @@ impl Value { #[doc(hidden)] #[inline] - pub fn copy_str_in(val: &str, idx: usize, shared: &mut Shared) -> Self { - let str = shared.alloc.alloc_str(val); + pub fn copy_str_in(kind: u64, val: &str, idx: usize, shared: &mut Shared) -> Self { + let str = shared.get_alloc().alloc_str(val); let node_idx = idx as u32; // we check the json length when parsing, so val.len() should always be less than u32::MAX Value { - meta: Meta::pack_dom_node(Meta::STR_NODE, node_idx, str.len() as u32), + meta: Meta::pack_dom_node(kind, node_idx, str.len() as u32), data: Data { dom_str: unsafe { NonNull::new_unchecked(str.as_ptr() as *mut _) }, }, @@ -1288,16 +1215,6 @@ impl Value { None } - #[inline] - pub(crate) fn insert_value(&mut self, index: usize, element: Value) { - debug_assert!(self.is_array()); - if let ValueMut::Array(arr) = self.as_mut() { - arr.insert(index, element); - } else { - unreachable!(" insert value in non-array ") - } - } - #[inline] fn equal_str(&self, val: &str) -> bool { debug_assert!(self.is_str()); @@ -1313,13 +1230,14 @@ impl Value { match self.unpack_ref() { ValueDetail::Array(arr) => arr.capacity(), ValueDetail::Object(obj) => obj.capacity(), - ValueDetail::NodeInDom(indom) | ValueDetail::NodeInDom(indom) => { + ValueDetail::NodeInDom(indom) | ValueDetail::Root(indom) => { if self.is_object() { indom.unpack_pair_slice().len() } else { indom.unpack_value_slice().len() } } + ValueDetail::EmptyArray | ValueDetail::EmptyObject => 0, _ => unreachable!("value is not array or object"), } } @@ -1442,7 +1360,7 @@ impl Value { } #[inline(never)] - pub(crate) fn parse_with_padding(&mut self, json: &[u8]) -> Result { + pub(crate) fn parse_with_padding(&mut self, json: &[u8], cfg: DeserializeCfg) -> Result { // allocate the padding buffer for the input json let mut shared = Arc::new(Shared::default()); let mut buffer = Vec::with_capacity(json.len() + Self::PADDING_SIZE); @@ -1452,14 +1370,14 @@ impl Value { let smut = Arc::get_mut(&mut shared).unwrap(); let slice = PaddedSliceRead::new(buffer.as_mut_slice()); - let mut parser = Parser::new(slice); + let mut parser = Parser::new(slice).with_config(cfg); let mut vis = DocumentVisitor::new(json.len(), smut); parser.parse_dom(&mut vis)?; let idx = parser.read.index(); // NOTE: root node should is the first node *self = unsafe { vis.root.as_ref().clone() }; - smut.json = buffer; + smut.set_json(buffer); Ok(idx) } @@ -1467,11 +1385,12 @@ impl Value { pub(crate) fn parse_without_padding<'de, R: Reader<'de>>( &mut self, shared: &mut Shared, + strbuf: &mut Vec, parser: &mut Parser, ) -> Result<()> { let remain_len = parser.read.remain(); let mut vis = DocumentVisitor::new(remain_len, shared); - parser.parse_value_without_padding(&mut vis)?; + parser.parse_dom2(&mut vis, strbuf)?; *self = unsafe { vis.root.as_ref().clone() }; Ok(()) } @@ -1523,7 +1442,7 @@ impl MetaNode { let canary = b"SONICRS\0"; MetaNode { shared, - canary: unsafe { transmute(canary) }, + canary: unsafe { transmute::<&[u8; 8], usize>(canary) }, } } } @@ -1563,7 +1482,8 @@ impl<'a> DocumentVisitor<'a> { let visited_children = &vis.nodes()[(parent + 1)..]; let real_count = visited_children.len() + Value::HEAD_NODE_COUNT; let layout = Layout::array::(real_count).unwrap(); - let hdr = vis.shared.alloc.alloc_layout(layout).as_ptr() as *mut ManuallyDrop; + let hdr = + vis.shared.get_alloc().alloc_layout(layout).as_ptr() as *mut ManuallyDrop; // copy visited nodes into document let visited_children = &vis.nodes()[(parent + 1)..]; @@ -1590,10 +1510,11 @@ impl<'a> DocumentVisitor<'a> { // should alloc root node in the bump allocator let start = self.nodes_start; let (rm, ru) = unsafe { (self.nodes()[start].meta, self.nodes()[start].data.uval) }; + let ptr = self.shared as *const _; let (_, root) = self .shared - .alloc - .alloc((MetaNode::new(self.shared as *const _), Value::default())); + .get_alloc() + .alloc((MetaNode::new(ptr), Value::default())); // copy visited nodes into document root.meta = rm; @@ -1616,8 +1537,9 @@ impl<'a> DocumentVisitor<'a> { if self.nodes().len() == self.nodes().capacity() { false } else { - self.nodes() - .push(ManuallyDrop::new(unsafe { transmute(node) })); + self.nodes().push(ManuallyDrop::new(unsafe { + transmute::(node) + })); true } } @@ -1646,16 +1568,17 @@ impl<'de, 'a> JsonVisitor<'de> for DocumentVisitor<'a> { self.push_node(node) } - #[cfg(feature = "arbitrary_precision")] #[inline(always)] fn visit_raw_number(&mut self, val: &str) -> bool { - todo!("raw num") + let idx = self.index(); + let node = Value::copy_str_in(Meta::RAWNUM_NODE, val, idx, self.shared); + self.push_node(node) } - #[cfg(feature = "arbitrary_precision")] #[inline(always)] fn visit_borrowed_raw_number(&mut self, val: &str) -> bool { - todo!("raw num") + let idx = self.index(); + self.push_node(Value::pack_str(Meta::RAWNUM_NODE, idx, val)) } #[inline(always)] @@ -1697,14 +1620,14 @@ impl<'de, 'a> JsonVisitor<'de> for DocumentVisitor<'a> { #[inline(always)] fn visit_str(&mut self, val: &str) -> bool { let idx = self.index(); - let node = Value::copy_str_in(val, idx, self.shared); + let node = Value::copy_str_in(Meta::STR_NODE, val, idx, self.shared); self.push_node(node) } #[inline(always)] fn visit_borrowed_str(&mut self, val: &'de str) -> bool { let idx = self.index(); - self.push_node(Value::pack_str(idx, val)) + self.push_node(Value::pack_str(Meta::STR_NODE, idx, val)) } #[inline(always)] @@ -1717,24 +1640,41 @@ impl<'de, 'a> JsonVisitor<'de> for DocumentVisitor<'a> { self.visit_borrowed_str(key) } - fn visit_raw_str(&mut self, val: &str, raw: &mut [u8]) -> bool { - // update the header - let raw = unsafe { - let mut r = RawStr::from_raw_parts_mut(raw.as_mut_ptr()); - r.set_header(RawStrHeader { - node_idx: self.nodes().len() as u32, - str_len: val.len() as u32, - raw_len: raw.len() as u32, - }); - r - }; - self.push_node(Value::pack_raw_str(val, raw)) + fn visit_raw_str(&mut self, val: &str, mut raw: RawStr) -> bool { + unsafe { + raw.set_index(self.index()); + raw.set_str_len(val.len()); + } + let node = Value::pack_raw_str(val, raw); + self.push_node(node) } fn visit_dom_end(&mut self) -> bool { self.visit_root(); true } + + fn allocator(&mut self) -> Option<&mut Bump> { + Some(self.shared.get_alloc()) + } +} + +impl Value { + pub(crate) const RAW_TOKEN: &str = "_private:sonic_rs:raw"; +} + +#[derive(Debug)] +struct RawKey<'a>(&'a str); + +impl Serialize for RawKey<'_> { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct(Value::RAW_TOKEN, 1)?; + s.serialize_field(Value::RAW_TOKEN, &self.0)?; + s.end() + } } impl Serialize for Value { @@ -1747,7 +1687,7 @@ impl Serialize for Value { ValueRefInner::Null => serializer.serialize_unit(), ValueRefInner::Bool(b) => serializer.serialize_bool(b), ValueRefInner::Number(n) => n.serialize(serializer), - ValueRefInner::Str(s) if self.is_raw_str() => { + ValueRefInner::Str(_) if self.is_raw_str() => { use serde::ser::SerializeStruct; use crate::serde::rawnumber::TOKEN; @@ -1770,10 +1710,15 @@ impl Serialize for Value { { // TODO: sort the keys use thread-local buffer let mut kvs: Vec<&(Value, Value)> = o.iter().collect(); - kvs.sort_by(|(k1, _), (k2, _)| k1.str().cmp(k2.str())); + kvs.sort_by(|(k1, _), (k2, _)| k1.as_str().unwrap().cmp(k2.as_str().unwrap())); let mut map = tri!(serializer.serialize_map(Some(kvs.len()))); for (k, v) in kvs { - tri!(map.serialize_entry(k, v)); + if k.is_raw_str() { + tri!(map.serialize_key(&RawKey(k.raw_str()))); + } else { + tri!(map.serialize_key(k.as_str().unwrap())); + } + tri!(map.serialize_value(v)); } map.end() } @@ -1782,7 +1727,12 @@ impl Serialize for Value { let entries = o.iter(); let mut map = tri!(serializer.serialize_map(Some(entries.len()))); for (k, v) in entries { - tri!(map.serialize_entry(k, v)); + if k.is_raw_str() { + tri!(map.serialize_key(&RawKey(k.raw_str()))); + } else { + tri!(map.serialize_key(k.as_str().unwrap())); + } + tri!(map.serialize_value(v)); } map.end() } @@ -1795,7 +1745,6 @@ impl Serialize for Value { tri!(struct_.serialize_field(TOKEN, raw)); struct_.end() } - _ => panic!("unsupported types"), } } } @@ -2051,22 +2000,22 @@ mod test { fn test_invalid_utf8() { use crate::{from_slice, from_slice_unchecked}; - // let data = [b'"', 0x80, 0x90, b'"']; - // let ret: Result = from_slice(&data); - // assert_eq!( - // ret.err().unwrap().to_string(), - // "Invalid UTF-8 characters in json at line 1 column 1\n\n\t\"��\"\n\t.^..\n" - // ); + let data = [b'"', 0x80, 0x90, b'"']; + let ret: Result = from_slice(&data); + assert_eq!( + ret.err().unwrap().to_string(), + "Invalid UTF-8 characters in json at line 1 column 1\n\n\t\"��\"\n\t.^..\n" + ); - // let dom: Result = unsafe { from_slice_unchecked(&data) }; - // assert!(dom.is_ok(), "{}", dom.unwrap_err()); + let dom: Result = unsafe { from_slice_unchecked(&data) }; + assert!(dom.is_ok(), "{}", dom.unwrap_err()); - // let data = [b'"', b'"', 0x80]; - // let dom: Result = from_slice(&data); - // assert_eq!( - // dom.err().unwrap().to_string(), - // "Invalid UTF-8 characters in json at line 1 column 2\n\n\t\"\"�\n\t..^\n" - // ); + let data = [b'"', b'"', 0x80]; + let dom: Result = from_slice(&data); + assert_eq!( + dom.err().unwrap().to_string(), + "Invalid UTF-8 characters in json at line 1 column 2\n\n\t\"\"�\n\t..^\n" + ); let data = [0x80, b'"', b'"']; let dom: Result = unsafe { from_slice_unchecked(&data) }; @@ -2117,21 +2066,41 @@ mod test { .unwrap_err(); } - #[cfg(feature = "arbitrary_precision")] #[test] fn test_arbitrary_precision() { + use crate::{Deserialize, Deserializer}; + let nums = [ "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", "1.23456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567e89012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", "-0.000000023456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567e+89012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123", ]; for num in nums { - let value: Value = crate::from_str(num).unwrap(); + let mut de = Deserializer::from_str(num).use_rawnumber(); + let value: Value = Deserialize::deserialize(&mut de).unwrap(); assert_eq!(value.as_raw_number().unwrap().as_str(), num); assert_eq!(value.to_string(), num); } } + #[test] + fn test_raw_str() { + use crate::{Deserialize, Deserializer}; + let data = [ + r#"{"a":1}"#, + r#"{"a":1,"b":"\\u0001"}"#, + r#"{"a":1,"b":"💎"}"#, + r#"{"\\u0001":1,"b":"\\u0001"}"#, + ]; + + for json in data { + let mut de = Deserializer::from_str(json).use_raw(); + let value: Value = Deserialize::deserialize(&mut de).unwrap(); + let out = crate::to_string(&value).unwrap(); + assert_eq!(json, out); + } + } + #[cfg(feature = "sort_keys")] #[test] fn test_sort_keys() { diff --git a/src/value/object.rs b/src/value/object.rs index f8cd56c..2a15781 100644 --- a/src/value/object.rs +++ b/src/value/object.rs @@ -1,12 +1,9 @@ //! Represents a parsed JSON object. -use std::marker::PhantomData; +use std::{iter::FusedIterator, marker::PhantomData, slice}; use super::{node::ValueMut, value_trait::JsonValueTrait}; -use crate::{ - serde::tri, - util::reborrow::DormantMutRef, - value::node::{Value, ValueRefInner}, -}; +use crate::{serde::tri, util::reborrow::DormantMutRef, value::node::Value}; + /// Represents the JSON object. The inner implement is a key-value array. Its order is as same as /// origin JSON. /// @@ -53,25 +50,14 @@ impl PartialEq for Object { if self.len() != other.len() { return false; } - - for (k, v) in self.iter() { - if let Some(other_v) = other.get(&k) { - if v != other_v { - return false; - } - } else { - return false; - } - } - true + self.iter() + .all(|(k, v)| other.get(&k).map_or(false, |other_v| v == other_v)) } } #[doc(hidden)] pub type Pair = (Value, Value); -pub(crate) const DEFAULT_OBJ_CAP: usize = 4; - impl Object { /// Returns the inner `Value`. #[inline] @@ -298,7 +284,10 @@ impl Object { /// Returns the number of key-value paris in the object. #[inline] pub fn len(&self) -> usize { - self.0.len() + self.0 + .as_pair_slice() + .expect("get len in non-oject type") + .len() } /// Returns true if the object contains no key-value pairs. @@ -783,8 +772,7 @@ impl<'a> Entry<'a> { { match self { Entry::Occupied(entry) => entry.into_mut(), - Entry::Vacant(mut entry) => { - let obj = unsafe { entry.dormant_obj.reborrow() }; + Entry::Vacant(entry) => { let value = default(entry.key()); entry.insert(value) } @@ -792,10 +780,6 @@ impl<'a> Entry<'a> { } } -////////////////////////////////////////////////////////////////////////////// - -use std::{iter::FusedIterator, slice}; - macro_rules! impl_entry_iter { (($name:ident $($generics:tt)*): $item:ty) => { impl $($generics)* Iterator for $name $($generics)* { @@ -934,8 +918,6 @@ impl<'a, Q: AsRef + ?Sized> std::ops::IndexMut<&'a Q> for Object { } } -////////////////////////////////////////////////////////////////////////////// - impl serde::ser::Serialize for Object { #[inline] fn serialize(&self, serializer: S) -> std::result::Result diff --git a/src/value/partial_eq.rs b/src/value/partial_eq.rs index 7be58c3..795cfd7 100644 --- a/src/value/partial_eq.rs +++ b/src/value/partial_eq.rs @@ -15,8 +15,9 @@ impl PartialEq for Value { match self.as_ref2() { ValueRefInner::Null => other.is_null(), ValueRefInner::Bool(a) => other.as_bool().map_or(false, |b| a == b), - ValueRefInner::Number(a) => other.as_number().map_or(false, |b| a == b), - ValueRefInner::RawNum(a) => other.as_rawnum().map_or(false, |b| a == b), + ValueRefInner::Number(_) | ValueRefInner::RawNum(_) => { + other.as_number() == self.as_number() + } ValueRefInner::Str(a) => other.as_str().map_or(false, |b| a == b), ValueRefInner::Array(_) | ValueRefInner::EmptyArray => { other.as_value_slice() == self.as_value_slice() @@ -24,7 +25,6 @@ impl PartialEq for Value { ValueRefInner::Object(_) | ValueRefInner::EmptyObject => { other.as_object() == self.as_object() } - _ => unreachable!(), } } } diff --git a/src/value/ser.rs b/src/value/ser.rs index 4b0f858..4cec817 100644 --- a/src/value/ser.rs +++ b/src/value/ser.rs @@ -653,6 +653,169 @@ impl serde::Serializer for MapKeySerializer { } } +struct RawNumberEmitter; + +impl serde::ser::Serializer for RawNumberEmitter { + type Ok = Value; + type Error = Error; + + type SerializeSeq = Impossible; + type SerializeTuple = Impossible; + type SerializeTupleStruct = Impossible; + type SerializeTupleVariant = Impossible; + type SerializeMap = Impossible; + type SerializeStruct = Impossible; + type SerializeStructVariant = Impossible; + + fn serialize_bool(self, _v: bool) -> Result { + unreachable!() + } + + fn serialize_i8(self, _v: i8) -> Result { + unreachable!() + } + + fn serialize_i16(self, _v: i16) -> Result { + unreachable!() + } + + fn serialize_i32(self, _v: i32) -> Result { + unreachable!() + } + + fn serialize_i64(self, _v: i64) -> Result { + unreachable!() + } + + fn serialize_u8(self, _v: u8) -> Result { + unreachable!() + } + + fn serialize_u16(self, _v: u16) -> Result { + unreachable!() + } + + fn serialize_u32(self, _v: u32) -> Result { + unreachable!() + } + + fn serialize_u64(self, _v: u64) -> Result { + unreachable!() + } + + fn serialize_f32(self, _v: f32) -> Result { + unreachable!() + } + + fn serialize_f64(self, _v: f64) -> Result { + unreachable!() + } + + fn serialize_char(self, _v: char) -> Result { + unreachable!() + } + + fn serialize_str(self, value: &str) -> Result { + Ok(Value::new_rawnum(value)) + } + + fn serialize_bytes(self, _value: &[u8]) -> Result { + unreachable!() + } + + fn serialize_none(self) -> Result { + unreachable!() + } + + fn serialize_some(self, _value: &T) -> Result + where + T: ?Sized + Serialize, + { + unreachable!() + } + + fn serialize_unit(self) -> Result { + unreachable!() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + unreachable!() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + unreachable!() + } + + fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result + where + T: ?Sized + Serialize, + { + unreachable!() + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: ?Sized + Serialize, + { + unreachable!() + } + + fn serialize_seq(self, _len: Option) -> Result { + unreachable!() + } + + fn serialize_tuple(self, _len: usize) -> Result { + unreachable!() + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_map(self, _len: Option) -> Result { + unreachable!() + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + unreachable!() + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } +} + impl serde::ser::SerializeStruct for SerializeMap { type Ok = Value; type Error = Error; @@ -663,8 +826,13 @@ impl serde::ser::SerializeStruct for SerializeMap { { match &mut self.map { MapInner::Object { .. } => serde::ser::SerializeMap::serialize_entry(self, key, value), - MapInner::RawNumber { out_value: _ } => { - todo!() + MapInner::RawNumber { out_value } => { + if key == crate::serde::rawnumber::TOKEN { + *out_value = Some(tri!(value.serialize(RawNumberEmitter))); + Ok(()) + } else { + unreachable!() + } } } } @@ -760,7 +928,6 @@ mod test { } #[test] - #[cfg(not(feature = "arbitrary_precision"))] fn test_to_value2() { use crate::prelude::*; diff --git a/src/value/shared.rs b/src/value/shared.rs index 990b612..ae229aa 100644 --- a/src/value/shared.rs +++ b/src/value/shared.rs @@ -7,6 +7,20 @@ use bumpalo::Bump; #[repr(C)] #[doc(hidden)] pub struct Shared { - pub(crate) json: Vec, - pub(crate) alloc: Bump, + json: Vec, + alloc: Bump, } + +impl Shared { + pub fn get_alloc(&mut self) -> &mut Bump { + &mut self.alloc + } + + pub fn set_json(&mut self, json: Vec) { + self.json = json; + } +} + +// #safety +// we not export the immutable bump allocator, so `Sync`` is always safe here +unsafe impl Sync for Shared {} diff --git a/src/value/value_trait.rs b/src/value/value_trait.rs index af6ccf7..7e2059f 100644 --- a/src/value/value_trait.rs +++ b/src/value/value_trait.rs @@ -1,6 +1,4 @@ -#[cfg(feature = "arbitrary_precision")] -use crate::RawNumber; -use crate::{index::Index, JsonNumberTrait, Number}; +use crate::{index::Index, JsonNumberTrait, Number, RawNumber}; /// JsonType is an enum that represents the type of a JSON value. /// @@ -317,7 +315,6 @@ pub trait JsonValueTrait { fn as_number(&self) -> Option; /// Returns the [`RawNumber`] without precision loss if `self` is a `Number`. - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option; /// Returns the str if `self` is a `string`. @@ -507,7 +504,11 @@ pub trait JsonValueMutTrait { } impl JsonValueTrait for Option { - type ValueType<'v> = V::ValueType<'v> where V:'v, Self: 'v; + type ValueType<'v> + = V::ValueType<'v> + where + V: 'v, + Self: 'v; fn as_bool(&self) -> Option { self.as_ref().and_then(|v| v.as_bool()) @@ -529,7 +530,6 @@ impl JsonValueTrait for Option { self.as_ref().and_then(|v| v.as_number()) } - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option { self.as_ref().and_then(|v| v.as_raw_number()) } @@ -592,7 +592,11 @@ impl JsonValueMutTrait for Option { } impl JsonValueTrait for Result { - type ValueType<'v> = V::ValueType<'v> where V:'v, Self: 'v; + type ValueType<'v> + = V::ValueType<'v> + where + V: 'v, + Self: 'v; fn as_bool(&self) -> Option { self.as_ref().ok().and_then(|v| v.as_bool()) @@ -614,7 +618,6 @@ impl JsonValueTrait for Result { self.as_ref().ok().and_then(|v| v.as_number()) } - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option { self.as_ref().ok().and_then(|v| v.as_raw_number()) } @@ -677,7 +680,11 @@ impl JsonValueMutTrait for Result { } impl JsonValueTrait for &V { - type ValueType<'v> = V::ValueType<'v> where V:'v, Self: 'v; + type ValueType<'v> + = V::ValueType<'v> + where + V: 'v, + Self: 'v; fn as_bool(&self) -> Option { (*self).as_bool() @@ -699,7 +706,6 @@ impl JsonValueTrait for &V { (*self).as_number() } - #[cfg(feature = "arbitrary_precision")] fn as_raw_number(&self) -> Option { (*self).as_raw_number() } diff --git a/src/value/visitor.rs b/src/value/visitor.rs index 9c1df3e..147966d 100644 --- a/src/value/visitor.rs +++ b/src/value/visitor.rs @@ -1,3 +1,7 @@ +use bumpalo::Bump; + +use super::node::RawStr; + pub(crate) trait JsonVisitor<'de> { fn visit_dom_start(&mut self) -> bool { false @@ -70,11 +74,15 @@ pub(crate) trait JsonVisitor<'de> { false } - fn visit_raw_str(&mut self, _value: &str, _raw: &mut [u8]) -> bool { + fn visit_raw_str(&mut self, _value: &str, _raw: RawStr) -> bool { false } fn visit_dom_end(&mut self) -> bool { false } + + fn allocator(&mut self) -> Option<&mut Bump> { + None + } }