diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..62637fa --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,9 @@ +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f65219d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,97 @@ +name: Rust + +on: + push: + branches: [ "main" ] + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + fmt: + name: Formatting + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + + - name: Setup Rust toolchain and cache + uses: actions-rust-lang/setup-rust-toolchain@v1.5.0 + with: + toolchain: "nightly" + components: "rustfmt" + + - name: cargo fmt + run: cargo fmt --all -- --check + + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + + - name: Setup Rust toolchain and cache + uses: actions-rust-lang/setup-rust-toolchain@v1.5.0 + + - name: Validate documentation + run: RUSTDOCFLAGS="-D warnings" cargo doc --workspace --no-deps --all-features --document-private-items + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + + - name: Setup Rust toolchain and cache + uses: actions-rust-lang/setup-rust-toolchain@v1.5.0 + with: + toolchain: "nightly" + components: "clippy" + + - name: Clippy + run: cargo clippy --workspace --no-deps --all-features --all-targets -- -D warnings + + tests: + name: Tests + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + + - name: Setup Rust toolchain and cache + uses: actions-rust-lang/setup-rust-toolchain@v1.5.0 + + - name: All features + run: cargo test --workspace --all-features --all-targets + + - name: No default features + run: cargo test --workspace --no-default-features --all-targets + + miri: + name: Miri Tests + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + + - name: Setup Rust toolchain and cache + uses: actions-rust-lang/setup-rust-toolchain@v1.5.0 + with: + toolchain: "nightly" + components: "miri" + + - name: Run tests + run: MIRIFLAGS='-Zmiri-tree-borrows' cargo miri test --workspace --all-features --all-targets + + typos: + name: Typos + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v3 + + - name: Check for spelling errors + uses: crate-ci/typos@v1.16.5 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ebc5ea --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..40be44d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,116 @@ +[workspace] +members = ["valence_nbt", "java_string"] +resolver = "2" + +[workspace.package] +edition = "2021" +license = "MIT" +repository = "https://github.com/valence-rs/valence_nbt" + +[workspace.lints.rust] +elided_lifetimes_in_paths = "allow" # Warned by `future_incompatible`. +future_incompatible = "warn" +missing_debug_implementations = "warn" +# missing_docs = "warn" +nonstandard_style = "warn" +rust_2018_idioms = "warn" +trivial_numeric_casts = "warn" +unreachable_pub = "warn" +unused_import_braces = "warn" +unused_lifetimes = "warn" + +[workspace.lints.clippy] +alloc_instead_of_core = "warn" +as_ptr_cast_mut = "warn" +as_underscore = "warn" +bool_to_int_with_if = "warn" +case_sensitive_file_extension_comparisons = "warn" +cast_lossless = "warn" +checked_conversions = "warn" +cloned_instead_of_copied = "warn" +copy_iterator = "warn" +dbg_macro = "warn" +doc_link_with_quotes = "warn" +doc_markdown = "warn" +empty_enum_variants_with_brackets = "warn" +empty_structs_with_brackets = "warn" +explicit_deref_methods = "warn" +explicit_into_iter_loop = "warn" +explicit_iter_loop = "warn" +filter_map_next = "warn" +flat_map_option = "warn" +format_push_string = "warn" +from_iter_instead_of_collect = "warn" +get_unwrap = "warn" +if_then_some_else_none = "warn" +ignored_unit_patterns = "warn" +impl_trait_in_params = "warn" +implicit_clone = "warn" +inconsistent_struct_constructor = "warn" +inefficient_to_string = "warn" +infinite_loop = "warn" +into_iter_without_iter = "warn" +invalid_upcast_comparisons = "warn" +iter_filter_is_ok = "warn" +iter_filter_is_some = "warn" +iter_not_returning_iterator = "warn" +iter_over_hash_type = "warn" # Requires justification +iter_without_into_iter = "warn" +large_stack_arrays = "warn" +large_types_passed_by_value = "warn" +macro_use_imports = "warn" +manual_assert = "warn" +manual_instant_elapsed = "warn" +manual_is_variant_and = "warn" +manual_let_else = "warn" +manual_ok_or = "warn" +manual_string_new = "warn" +map_unwrap_or = "warn" +match_bool = "warn" +match_wildcard_for_single_variants = "warn" +mismatching_type_param_order = "warn" +missing_fields_in_debug = "warn" +mixed_read_write_in_expression = "warn" +mod_module_files = "warn" +multiple_inherent_impl = "warn" +mut_mut = "warn" +mutex_atomic = "warn" +needless_bitwise_bool = "warn" +needless_continue = "warn" +needless_for_each = "warn" +needless_raw_string_hashes = "warn" +needless_raw_strings = "warn" +negative_feature_names = "warn" +no_mangle_with_rust_abi = "warn" +option_as_ref_cloned = "warn" +pub_underscore_fields = "warn" +rc_buffer = "warn" +rc_mutex = "warn" +redundant_else = "warn" +redundant_feature_names = "warn" +ref_patterns = "warn" +rest_pat_in_fully_bound_structs = "warn" +semicolon_outside_block = "warn" +str_to_string = "warn" +string_lit_chars_any = "warn" +string_to_string = "warn" +struct_field_names = "warn" +tests_outside_test_module = "warn" +todo = "warn" +trivially_copy_pass_by_ref = "warn" +try_err = "warn" +# undocumented_unsafe_blocks = "warn" +uninlined_format_args = "warn" +unnecessary_join = "warn" +# unnecessary_safety_doc = "warn" +unnecessary_self_imports = "warn" +unneeded_field_pattern = "warn" +unnested_or_patterns = "warn" +unseparated_literal_suffix = "warn" +unused_self = "warn" +used_underscore_binding = "warn" +wildcard_dependencies = "warn" +zero_sized_map_values = "warn" + +[workspace.lints.rustdoc] +unescaped_backticks = "warn" diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..0933e35 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Ryan Johnson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b182832 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# valence_nbt + +A library for encoding and decoding Minecraft's [Named Binary Tag] (NBT) +format. + +[Named Binary Tag]: https://minecraft.wiki/w/NBT_format + +# Features + +- `binary`: Serialize and deserialize in Java edition's binary format. +- `snbt`: Serialize and deserialize in "stringified" format. +- `preserve_order`: Preserve the order of fields in [`Compound`]s during insertion and deletion. The iterators on `Compound` then implement `DoubleEndedIterator`. +- `serde`: Adds support for [`serde`](https://docs.rs/serde/latest/serde/) +- `java_string`: Adds support for Java-compatible strings via the `java_string` crate. diff --git a/java_string/Cargo.toml b/java_string/Cargo.toml new file mode 100644 index 0000000..4babf5d --- /dev/null +++ b/java_string/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "java_string" +version = "0.1.2" +description = "An implementation of Java strings, tolerant of invalid UTF-16 encoding" +keywords = ["java", "string", "utf16"] +edition.workspace = true +repository.workspace = true +license.workspace = true + +[features] +serde = ["dep:serde"] + +[dependencies] +serde = { version = "1.0.200", optional = true } + +[lints] +workspace = true + diff --git a/java_string/README.md b/java_string/README.md new file mode 100644 index 0000000..7135d64 --- /dev/null +++ b/java_string/README.md @@ -0,0 +1,17 @@ +# java_string + +An implementation of Java strings, tolerant of invalid UTF-16 encoding. +This allows for round-trip serialization of all Java strings, including those which contain invalid UTF-16, while still +being able to perform useful operations on those strings. + +These Java strings use the UTF-8 encoding, with the modification that surrogate code points (code points between U+D800 +and U+DFFF inclusive) are allowed. This allows for zero-cost conversion from Rust strings to Java strings. This modified +encoding is known as "semi-UTF-8" throughout the codebase. Similarly, this crate introduces a `JavaCodePoint` type which +is analogous to `char`, except that surrogate code points are allowed. + +This crate is mostly undocumented, because most methods are entirely analogous to those of the same name in Rust's +strings. Please refer to the `std` documentation. + +# Features + +- `serde` Adds support for [`serde`](https://docs.rs/serde/latest/serde/) \ No newline at end of file diff --git a/java_string/src/cesu8.rs b/java_string/src/cesu8.rs new file mode 100644 index 0000000..dc1796c --- /dev/null +++ b/java_string/src/cesu8.rs @@ -0,0 +1,280 @@ +use std::borrow::Cow; + +use crate::validations::{utf8_char_width, CONT_MASK, TAG_CONT}; +use crate::{JavaStr, JavaString, Utf8Error}; + +impl JavaStr { + /// Converts from Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format to a `Cow`. + /// + /// ``` + /// # use std::borrow::Cow; + /// # use java_string::{JavaCodePoint, JavaStr, JavaString}; + /// + /// let result = JavaStr::from_modified_utf8("Hello World!".as_bytes()).unwrap(); + /// assert!(matches!(result, Cow::Borrowed(_))); + /// assert_eq!(JavaStr::from_str("Hello World!"), result); + /// + /// let result = JavaStr::from_modified_utf8(&[ + /// 0x61, 0x62, 0x63, 0xC0, 0x80, 0xE2, 0x84, 0x9D, 0xED, 0xA0, 0xBD, 0xED, 0xB2, 0xA3, 0xED, + /// 0xA0, 0x80, + /// ]) + /// .unwrap(); + /// assert!(matches!(result, Cow::Owned(_))); + /// let mut expected = JavaString::from("abc\0โ„๐Ÿ’ฃ"); + /// expected.push_java(JavaCodePoint::from_u32(0xD800).unwrap()); + /// assert_eq!(expected, result); + /// + /// let result = JavaStr::from_modified_utf8(&[0xED]); + /// assert!(result.is_err()); + /// ``` + #[inline] + pub fn from_modified_utf8(bytes: &[u8]) -> Result, Utf8Error> { + match JavaStr::from_full_utf8(bytes) { + Ok(str) => Ok(Cow::Borrowed(str)), + Err(_) => JavaString::from_modified_utf8_internal(bytes).map(Cow::Owned), + } + } + + /// Converts to Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format. + /// + /// ``` + /// # use std::borrow::Cow; + /// # use java_string::{JavaCodePoint, JavaStr, JavaString}; + /// + /// let result = JavaStr::from_str("Hello World!").to_modified_utf8(); + /// assert!(matches!(result, Cow::Borrowed(_))); + /// assert_eq!(result, &b"Hello World!"[..]); + /// + /// let mut str = JavaString::from("abc\0โ„๐Ÿ’ฃ"); + /// str.push_java(JavaCodePoint::from_u32(0xD800).unwrap()); + /// let result = str.to_modified_utf8(); + /// let expected = [ + /// 0x61, 0x62, 0x63, 0xC0, 0x80, 0xE2, 0x84, 0x9D, 0xED, 0xA0, 0xBD, 0xED, 0xB2, 0xA3, 0xED, + /// 0xA0, 0x80, + /// ]; + /// assert!(matches!(result, Cow::Owned(_))); + /// assert_eq!(result, &expected[..]); + /// ``` + #[inline] + #[must_use] + pub fn to_modified_utf8(&self) -> Cow<[u8]> { + if is_valid_cesu8(self) { + Cow::Borrowed(self.as_bytes()) + } else { + Cow::Owned(self.to_modified_utf8_internal()) + } + } + + #[inline] + fn to_modified_utf8_internal(&self) -> Vec { + let bytes = self.as_bytes(); + let mut encoded = Vec::with_capacity((bytes.len() + bytes.len()) >> 2); + let mut i = 0; + while i < bytes.len() { + let b = bytes[i]; + if b == 0 { + encoded.extend([0xC0, 0x80]); + i += 1; + } else if b < 128 { + // Pass ASCII through quickly. + encoded.push(b); + i += 1; + } else { + // Figure out how many bytes we need for this character. + let w = utf8_char_width(b); + let char_bytes = unsafe { + // SAFETY: input must be valid semi UTF-8, so there must be at least w more + // bytes from i + bytes.get_unchecked(i..i + w) + }; + if w != 4 { + // Pass through short UTF-8 sequences unmodified. + encoded.extend(char_bytes.iter().copied()) + } else { + // Encode 4-byte sequences as 6 bytes + let s = unsafe { + // SAFETY: input is valid semi UTF-8 + JavaStr::from_semi_utf8_unchecked(char_bytes) + }; + let c = unsafe { + // SAFETY: s contains a single char of width 4 + s.chars().next().unwrap_unchecked().as_u32() - 0x10000 + }; + let s = [((c >> 10) as u16) | 0xD800, ((c & 0x3FF) as u16) | 0xDC00]; + encoded.extend(enc_surrogate(s[0])); + encoded.extend(enc_surrogate(s[1])); + } + i += w; + } + } + encoded + } +} + +impl JavaString { + /// Converts from Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format to a `JavaString`. + /// + /// See [`JavaStr::from_modified_utf8`]. + #[inline] + pub fn from_modified_utf8(bytes: Vec) -> Result { + match JavaString::from_full_utf8(bytes) { + Ok(str) => Ok(str), + Err(err) => JavaString::from_modified_utf8_internal(&err.bytes), + } + } + + fn from_modified_utf8_internal(slice: &[u8]) -> Result { + let mut offset = 0; + let mut decoded = Vec::with_capacity(slice.len() + 1); + + while let Some(&first) = slice.get(offset) { + let old_offset = offset; + offset += 1; + + macro_rules! err { + ($error_len:expr) => { + return Err(Utf8Error { + valid_up_to: old_offset, + error_len: $error_len, + }) + }; + } + + macro_rules! next { + () => {{ + if let Some(&b) = slice.get(offset) { + offset += 1; + b + } else { + err!(None) + } + }}; + } + + macro_rules! next_cont { + ($error_len:expr) => {{ + let byte = next!(); + if (byte) & !CONT_MASK == TAG_CONT { + byte + } else { + err!($error_len) + } + }}; + } + + if first == 0 { + // modified UTF-8 should never contain \0 directly. + err!(Some(1)); + } else if first < 128 { + // Pass ASCII through directly. + decoded.push(first); + } else if first == 0xC0 { + // modified UTF-8 encoding of null character + match next!() { + 0x80 => decoded.push(0), + _ => err!(Some(1)), + } + } else { + let w = utf8_char_width(first); + let second = next_cont!(Some(1)); + match w { + // Two-byte sequences can be used directly. + 2 => { + decoded.extend([first, second]); + } + 3 => { + let third = next_cont!(Some(2)); + #[allow(clippy::unnested_or_patterns)] // Justification: readability + match (first, second) { + // These are valid UTF-8, so pass them through. + (0xe0, 0xa0..=0xbf) + | (0xe1..=0xec, 0x80..=0xbf) + | (0xed, 0x80..=0x9f) + | (0xee..=0xef, 0x80..=0xbf) + // Second half of a surrogate pair without a preceding first half, also pass this through. + | (0xed, 0xb0..=0xbf) + => decoded.extend([first, second, third]), + // First half of a surrogate pair + (0xed, 0xa0..=0xaf) => { + // Peek ahead and try to pair the first half of surrogate pair with + // second. + match &slice[offset..] { + [0xed, fifth @ 0xb0..=0xbf, sixth, ..] + if *sixth & !CONT_MASK == TAG_CONT => + { + let s = dec_surrogates(second, third, *fifth, *sixth); + decoded.extend(s); + offset += 3; + } + _ => { + // No second half, append the first half directly. + decoded.extend([first, second, third]); + } + } + } + _ => err!(Some(1)), + } + } + _ => err!(Some(1)), // modified UTF-8 doesn't allow width 4 + } + } + } + + unsafe { + // SAFETY: we built a semi UTF-8 encoded string + Ok(JavaString::from_semi_utf8_unchecked(decoded)) + } + } + + /// Converts to Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format. + /// + /// See [`JavaStr::to_modified_utf8`]. + #[inline] + #[must_use] + pub fn into_modified_utf8(self) -> Vec { + if is_valid_cesu8(&self) { + self.into_bytes() + } else { + self.to_modified_utf8_internal() + } + } +} + +#[inline] +fn dec_surrogate(second: u8, third: u8) -> u32 { + 0xD000 | u32::from(second & CONT_MASK) << 6 | u32::from(third & CONT_MASK) +} + +#[inline] +fn dec_surrogates(second: u8, third: u8, fifth: u8, sixth: u8) -> [u8; 4] { + // Convert to a 32-bit code point. + let s1 = dec_surrogate(second, third); + let s2 = dec_surrogate(fifth, sixth); + let c = 0x10000 + (((s1 - 0xD800) << 10) | (s2 - 0xDC00)); + assert!((0x010000..=0x10FFFF).contains(&c)); + + // Convert to UTF-8. + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + [ + 0b1111_0000_u8 | ((c & 0b1_1100_0000_0000_0000_0000) >> 18) as u8, + TAG_CONT | ((c & 0b0_0011_1111_0000_0000_0000) >> 12) as u8, + TAG_CONT | ((c & 0b0_0000_0000_1111_1100_0000) >> 6) as u8, + TAG_CONT | (c & 0b0_0000_0000_0000_0011_1111) as u8, + ] +} + +#[inline] +fn is_valid_cesu8(text: &JavaStr) -> bool { + text.bytes() + .all(|b| b != 0 && ((b & !CONT_MASK) == TAG_CONT || utf8_char_width(b) <= 3)) +} + +#[inline] +fn enc_surrogate(surrogate: u16) -> [u8; 3] { + // 1110xxxx 10xxxxxx 10xxxxxx + [ + 0b11100000 | ((surrogate & 0b11110000_00000000) >> 12) as u8, + TAG_CONT | ((surrogate & 0b00001111_11000000) >> 6) as u8, + TAG_CONT | (surrogate & 0b00000000_00111111) as u8, + ] +} diff --git a/java_string/src/char.rs b/java_string/src/char.rs new file mode 100644 index 0000000..3dea4f7 --- /dev/null +++ b/java_string/src/char.rs @@ -0,0 +1,1012 @@ +use std::char::ParseCharError; +use std::cmp::Ordering; +use std::fmt; +use std::fmt::{Debug, Display, Formatter, Write}; +use std::hash::{Hash, Hasher}; +use std::iter::{once, FusedIterator, Once}; +use std::ops::Range; +use std::str::FromStr; + +use crate::validations::{TAG_CONT, TAG_FOUR_B, TAG_THREE_B, TAG_TWO_B}; + +// JavaCodePoint is guaranteed to have the same repr as a u32, with valid values +// of between 0 and 0x10FFFF, the same as a unicode code point. Surrogate code +// points are valid values of this type. +#[derive(Copy, Clone, PartialEq, Eq)] +#[repr(C)] +pub struct JavaCodePoint { + #[cfg(target_endian = "little")] + lower: u16, + upper: SeventeenValues, + #[cfg(target_endian = "big")] + lower: u16, +} + +#[repr(u16)] +#[derive(Copy, Clone, PartialEq, Eq)] +#[allow(unused)] +enum SeventeenValues { + V0, + V1, + V2, + V3, + V4, + V5, + V6, + V7, + V8, + V9, + V10, + V11, + V12, + V13, + V14, + V15, + V16, +} + +impl JavaCodePoint { + pub const MAX: JavaCodePoint = JavaCodePoint::from_char(char::MAX); + pub const REPLACEMENT_CHARACTER: JavaCodePoint = + JavaCodePoint::from_char(char::REPLACEMENT_CHARACTER); + + /// See [`char::from_u32`] + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// let c = JavaCodePoint::from_u32(0x2764); + /// assert_eq!(Some(JavaCodePoint::from_char('โค')), c); + /// + /// assert_eq!(None, JavaCodePoint::from_u32(0x110000)); + /// ``` + #[inline] + #[must_use] + pub const fn from_u32(i: u32) -> Option { + if i <= 0x10FFFF { + unsafe { Some(Self::from_u32_unchecked(i)) } + } else { + None + } + } + + /// # Safety + /// The argument must be within the valid Unicode code point range of 0 to + /// 0x10FFFF inclusive. Surrogate code points are allowed. + #[inline] + #[must_use] + pub const unsafe fn from_u32_unchecked(i: u32) -> JavaCodePoint { + // SAFETY: the caller checks that the argument can be represented by this type + std::mem::transmute(i) + } + + /// Converts a `char` to a code point. + #[inline] + #[must_use] + pub const fn from_char(char: char) -> JavaCodePoint { + unsafe { + // SAFETY: all chars are valid code points + JavaCodePoint::from_u32_unchecked(char as u32) + } + } + + /// Converts this code point to a `u32`. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32()); + /// assert_eq!(0xD800, JavaCodePoint::from_u32(0xD800).unwrap().as_u32()); + /// ``` + #[inline] + #[must_use] + pub const fn as_u32(self) -> u32 { + unsafe { + // SAFETY: JavaCodePoint has the same repr as a u32 + let result = std::mem::transmute(self); + + if result > 0x10FFFF { + // SAFETY: JavaCodePoint can never have a value > 0x10FFFF. + // This statement may allow the optimizer to remove branches in the calling code + // associated with out of bounds chars. + std::hint::unreachable_unchecked(); + } + + result + } + } + + /// Converts this code point to a `char`. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char()); + /// assert_eq!(None, JavaCodePoint::from_u32(0xD800).unwrap().as_char()); + /// ``` + #[inline] + #[must_use] + pub const fn as_char(self) -> Option { + char::from_u32(self.as_u32()) + } + + /// # Safety + /// The caller must ensure that this code point is not a surrogate code + /// point. + #[inline] + #[must_use] + pub unsafe fn as_char_unchecked(self) -> char { + char::from_u32_unchecked(self.as_u32()) + } + + /// See [`char::encode_utf16`] + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!( + /// 2, + /// JavaCodePoint::from_char('๐•Š') + /// .encode_utf16(&mut [0; 2]) + /// .len() + /// ); + /// assert_eq!( + /// 1, + /// JavaCodePoint::from_u32(0xD800) + /// .unwrap() + /// .encode_utf16(&mut [0; 2]) + /// .len() + /// ); + /// ``` + /// ```should_panic + /// # use java_string::JavaCodePoint; + /// // Should panic + /// JavaCodePoint::from_char('๐•Š').encode_utf16(&mut [0; 1]); + /// ``` + #[inline] + pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { + if let Some(char) = self.as_char() { + char.encode_utf16(dst) + } else { + dst[0] = self.as_u32() as u16; + &mut dst[..1] + } + } + + /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with + /// surrogate code points. See also [`char::encode_utf8`]. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!( + /// 2, + /// JavaCodePoint::from_char('รŸ') + /// .encode_semi_utf8(&mut [0; 4]) + /// .len() + /// ); + /// assert_eq!( + /// 3, + /// JavaCodePoint::from_u32(0xD800) + /// .unwrap() + /// .encode_semi_utf8(&mut [0; 4]) + /// .len() + /// ); + /// ``` + /// ```should_panic + /// # use java_string::JavaCodePoint; + /// // Should panic + /// JavaCodePoint::from_char('รŸ').encode_semi_utf8(&mut [0; 1]); + /// ``` + #[inline] + pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] { + let len = self.len_utf8(); + let code = self.as_u32(); + match (len, &mut dst[..]) { + (1, [a, ..]) => { + *a = code as u8; + } + (2, [a, b, ..]) => { + *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *b = (code & 0x3F) as u8 | TAG_CONT; + } + (3, [a, b, c, ..]) => { + *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *b = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *c = (code & 0x3F) as u8 | TAG_CONT; + } + (4, [a, b, c, d, ..]) => { + *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + *b = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *d = (code & 0x3F) as u8 | TAG_CONT; + } + _ => panic!( + "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", + len, + code, + dst.len() + ), + } + &mut dst[..len] + } + + /// See [`char::eq_ignore_ascii_case`]. + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &JavaCodePoint) -> bool { + match (self.as_char(), other.as_char()) { + (Some(char1), Some(char2)) => char1.eq_ignore_ascii_case(&char2), + (None, None) => self == other, + _ => false, + } + } + + /// See [`char::escape_debug`]. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!( + /// "a", + /// JavaCodePoint::from_char('a').escape_debug().to_string() + /// ); + /// assert_eq!( + /// "\\n", + /// JavaCodePoint::from_char('\n').escape_debug().to_string() + /// ); + /// assert_eq!( + /// "\\u{d800}", + /// JavaCodePoint::from_u32(0xD800) + /// .unwrap() + /// .escape_debug() + /// .to_string() + /// ); + /// ``` + #[inline] + #[must_use] + pub fn escape_debug(self) -> CharEscapeIter { + self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL) + } + + #[inline] + #[must_use] + pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> CharEscapeIter { + const NULL: u32 = '\0' as u32; + const TAB: u32 = '\t' as u32; + const CARRIAGE_RETURN: u32 = '\r' as u32; + const LINE_FEED: u32 = '\n' as u32; + const SINGLE_QUOTE: u32 = '\'' as u32; + const DOUBLE_QUOTE: u32 = '"' as u32; + const BACKSLASH: u32 = '\\' as u32; + + unsafe { + // SAFETY: all characters specified are in ascii range + match self.as_u32() { + NULL => CharEscapeIter::new([b'\\', b'0']), + TAB => CharEscapeIter::new([b'\\', b't']), + CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']), + LINE_FEED => CharEscapeIter::new([b'\\', b'n']), + SINGLE_QUOTE if args.escape_single_quote => CharEscapeIter::new([b'\\', b'\'']), + DOUBLE_QUOTE if args.escape_double_quote => CharEscapeIter::new([b'\\', b'"']), + BACKSLASH => CharEscapeIter::new([b'\\', b'\\']), + _ if self.is_printable() => { + // SAFETY: surrogate code points are not printable + CharEscapeIter::printable(self.as_char_unchecked()) + } + _ => self.escape_unicode(), + } + } + } + + #[inline] + fn is_printable(self) -> bool { + let Some(char) = self.as_char() else { + return false; + }; + if matches!(char, '\\' | '\'' | '"') { + return true; + } + char.escape_debug().next() != Some('\\') + } + + /// See [`char::escape_default`]. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!( + /// "a", + /// JavaCodePoint::from_char('a').escape_default().to_string() + /// ); + /// assert_eq!( + /// "\\n", + /// JavaCodePoint::from_char('\n').escape_default().to_string() + /// ); + /// assert_eq!( + /// "\\u{d800}", + /// JavaCodePoint::from_u32(0xD800) + /// .unwrap() + /// .escape_default() + /// .to_string() + /// ); + /// ``` + #[inline] + #[must_use] + pub fn escape_default(self) -> CharEscapeIter { + const TAB: u32 = '\t' as u32; + const CARRIAGE_RETURN: u32 = '\r' as u32; + const LINE_FEED: u32 = '\n' as u32; + const SINGLE_QUOTE: u32 = '\'' as u32; + const DOUBLE_QUOTE: u32 = '"' as u32; + const BACKSLASH: u32 = '\\' as u32; + + unsafe { + // SAFETY: all characters specified are in ascii range + match self.as_u32() { + TAB => CharEscapeIter::new([b'\\', b't']), + CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']), + LINE_FEED => CharEscapeIter::new([b'\\', b'n']), + SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']), + DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']), + BACKSLASH => CharEscapeIter::new([b'\\', b'\\']), + 0x20..=0x7E => CharEscapeIter::new([self.as_u32() as u8]), + _ => self.escape_unicode(), + } + } + } + + /// See [`char::escape_unicode`]. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// assert_eq!( + /// "\\u{2764}", + /// JavaCodePoint::from_char('โค').escape_unicode().to_string() + /// ); + /// assert_eq!( + /// "\\u{d800}", + /// JavaCodePoint::from_u32(0xD800) + /// .unwrap() + /// .escape_unicode() + /// .to_string() + /// ); + /// ``` + #[inline] + #[must_use] + pub fn escape_unicode(self) -> CharEscapeIter { + let x = self.as_u32(); + + let mut arr = [0; 10]; + arr[0] = b'\\'; + arr[1] = b'u'; + arr[2] = b'{'; + + let number_len = if x == 0 { + 1 + } else { + ((x.ilog2() >> 2) + 1) as usize + }; + arr[3 + number_len] = b'}'; + for hexit in 0..number_len { + arr[2 + number_len - hexit] = b"0123456789abcdef"[((x >> (hexit << 2)) & 15) as usize]; + } + + CharEscapeIter { + inner: EscapeIterInner::Escaped(EscapeIterEscaped { + bytes: arr, + range: 0..number_len + 4, + }), + } + } + + /// See [`char::is_alphabetic`]. + #[inline] + #[must_use] + pub fn is_alphabetic(self) -> bool { + self.as_char().is_some_and(|char| char.is_alphabetic()) + } + + /// See [`char::is_alphanumeric`]. + #[inline] + #[must_use] + pub fn is_alphanumeric(self) -> bool { + self.as_char().is_some_and(|char| char.is_alphanumeric()) + } + + /// See [`char::is_ascii`]. + #[inline] + #[must_use] + pub fn is_ascii(self) -> bool { + self.as_u32() <= 0x7F + } + + /// See [`char::is_ascii_alphabetic`]. + #[inline] + #[must_use] + pub const fn is_ascii_alphabetic(self) -> bool { + self.is_ascii_lowercase() || self.is_ascii_uppercase() + } + + /// See [`char::is_ascii_alphanumeric`]. + #[inline] + #[must_use] + pub const fn is_ascii_alphanumeric(self) -> bool { + self.is_ascii_alphabetic() || self.is_ascii_digit() + } + + /// See [`char::is_ascii_control`]. + #[inline] + #[must_use] + pub const fn is_ascii_control(self) -> bool { + matches!(self.as_u32(), 0..=0x1F | 0x7F) + } + + /// See [`char::is_ascii_digit`]. + #[inline] + #[must_use] + pub const fn is_ascii_digit(self) -> bool { + const ZERO: u32 = '0' as u32; + const NINE: u32 = '9' as u32; + matches!(self.as_u32(), ZERO..=NINE) + } + + /// See [`char::is_ascii_graphic`]. + #[inline] + #[must_use] + pub const fn is_ascii_graphic(self) -> bool { + matches!(self.as_u32(), 0x21..=0x7E) + } + + /// See [`char::is_ascii_hexdigit`]. + #[inline] + #[must_use] + pub const fn is_ascii_hexdigit(self) -> bool { + const LOWER_A: u32 = 'a' as u32; + const LOWER_F: u32 = 'f' as u32; + const UPPER_A: u32 = 'A' as u32; + const UPPER_F: u32 = 'F' as u32; + self.is_ascii_digit() || matches!(self.as_u32(), (LOWER_A..=LOWER_F) | (UPPER_A..=UPPER_F)) + } + + /// See [`char::is_ascii_lowercase`]. + #[inline] + #[must_use] + pub const fn is_ascii_lowercase(self) -> bool { + const A: u32 = 'a' as u32; + const Z: u32 = 'z' as u32; + matches!(self.as_u32(), A..=Z) + } + + /// See [`char::is_ascii_octdigit`]. + #[inline] + #[must_use] + pub const fn is_ascii_octdigit(self) -> bool { + const ZERO: u32 = '0' as u32; + const SEVEN: u32 = '7' as u32; + matches!(self.as_u32(), ZERO..=SEVEN) + } + + /// See [`char::is_ascii_punctuation`]. + #[inline] + #[must_use] + pub const fn is_ascii_punctuation(self) -> bool { + matches!( + self.as_u32(), + (0x21..=0x2F) | (0x3A..=0x40) | (0x5B..=0x60) | (0x7B..=0x7E) + ) + } + + /// See [`char::is_ascii_uppercase`]. + #[inline] + #[must_use] + pub const fn is_ascii_uppercase(self) -> bool { + const A: u32 = 'A' as u32; + const Z: u32 = 'Z' as u32; + matches!(self.as_u32(), A..=Z) + } + + /// See [`char::is_ascii_whitespace`]. + #[inline] + #[must_use] + pub const fn is_ascii_whitespace(self) -> bool { + const SPACE: u32 = ' ' as u32; + const HORIZONTAL_TAB: u32 = '\t' as u32; + const LINE_FEED: u32 = '\n' as u32; + const FORM_FEED: u32 = 0xC; + const CARRIAGE_RETURN: u32 = '\r' as u32; + matches!( + self.as_u32(), + SPACE | HORIZONTAL_TAB | LINE_FEED | FORM_FEED | CARRIAGE_RETURN + ) + } + + /// See [`char::is_control`]. + #[inline] + #[must_use] + pub fn is_control(self) -> bool { + self.as_char().is_some_and(|char| char.is_control()) + } + + /// See [`char::is_digit`]. + #[inline] + #[must_use] + pub fn is_digit(self, radix: u32) -> bool { + self.to_digit(radix).is_some() + } + + /// See [`char::is_lowercase`]. + #[inline] + #[must_use] + pub fn is_lowercase(self) -> bool { + self.as_char().is_some_and(|char| char.is_lowercase()) + } + + /// See [`char::is_numeric`]. + #[inline] + #[must_use] + pub fn is_numeric(self) -> bool { + self.as_char().is_some_and(|char| char.is_numeric()) + } + + /// See [`char::is_uppercase`]. + #[inline] + #[must_use] + pub fn is_uppercase(self) -> bool { + self.as_char().is_some_and(|char| char.is_uppercase()) + } + + /// See [`char::is_whitespace`]. + #[inline] + #[must_use] + pub fn is_whitespace(self) -> bool { + self.as_char().is_some_and(|char| char.is_whitespace()) + } + + /// See [`char::len_utf16`]. Surrogate code points return 1. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// + /// let n = JavaCodePoint::from_char('รŸ').len_utf16(); + /// assert_eq!(n, 1); + /// + /// let len = JavaCodePoint::from_char('๐Ÿ’ฃ').len_utf16(); + /// assert_eq!(len, 2); + /// + /// assert_eq!(1, JavaCodePoint::from_u32(0xD800).unwrap().len_utf16()); + /// ``` + #[inline] + #[must_use] + pub const fn len_utf16(self) -> usize { + if let Some(char) = self.as_char() { + char.len_utf16() + } else { + 1 // invalid code points are encoded as 1 utf16 code point anyway + } + } + + /// See [`char::len_utf8`]. Surrogate code points return 3. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// + /// let len = JavaCodePoint::from_char('A').len_utf8(); + /// assert_eq!(len, 1); + /// + /// let len = JavaCodePoint::from_char('รŸ').len_utf8(); + /// assert_eq!(len, 2); + /// + /// let len = JavaCodePoint::from_char('โ„').len_utf8(); + /// assert_eq!(len, 3); + /// + /// let len = JavaCodePoint::from_char('๐Ÿ’ฃ').len_utf8(); + /// assert_eq!(len, 4); + /// + /// let len = JavaCodePoint::from_u32(0xD800).unwrap().len_utf8(); + /// assert_eq!(len, 3); + /// ``` + #[inline] + #[must_use] + pub const fn len_utf8(self) -> usize { + if let Some(char) = self.as_char() { + char.len_utf8() + } else { + 3 // invalid code points are all length 3 in semi-valid utf8 + } + } + + /// See [`char::make_ascii_lowercase`]. + #[inline] + pub fn make_ascii_lowercase(&mut self) { + *self = self.to_ascii_lowercase(); + } + + /// See [`char::make_ascii_uppercase`]. + #[inline] + pub fn make_ascii_uppercase(&mut self) { + *self = self.to_ascii_uppercase(); + } + + /// See [`char::to_ascii_lowercase`]. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// + /// let ascii = JavaCodePoint::from_char('A'); + /// let non_ascii = JavaCodePoint::from_char('โค'); + /// + /// assert_eq!('a', ascii.to_ascii_lowercase()); + /// assert_eq!('โค', non_ascii.to_ascii_lowercase()); + /// ``` + #[inline] + #[must_use] + pub const fn to_ascii_lowercase(self) -> JavaCodePoint { + if self.is_ascii_uppercase() { + unsafe { + // SAFETY: all lowercase chars are valid chars + Self::from_u32_unchecked(self.as_u32() + 32) + } + } else { + self + } + } + + /// See [`char::to_ascii_uppercase`]. + /// + /// ``` + /// # use java_string::JavaCodePoint; + /// + /// let ascii = JavaCodePoint::from_char('a'); + /// let non_ascii = JavaCodePoint::from_char('โค'); + /// + /// assert_eq!('A', ascii.to_ascii_uppercase()); + /// assert_eq!('โค', non_ascii.to_ascii_uppercase()); + /// ``` + #[inline] + #[must_use] + pub const fn to_ascii_uppercase(self) -> JavaCodePoint { + if self.is_ascii_lowercase() { + unsafe { + // SAFETY: all uppercase chars are valid chars + Self::from_u32_unchecked(self.as_u32() - 32) + } + } else { + self + } + } + + /// See [`char::to_digit`]. + #[inline] + #[must_use] + pub const fn to_digit(self, radix: u32) -> Option { + if let Some(char) = self.as_char() { + char.to_digit(radix) + } else { + None + } + } + + /// See [`char::to_lowercase`]. + #[inline] + #[must_use] + pub fn to_lowercase(self) -> ToLowercase { + match self.as_char() { + Some(char) => ToLowercase::char(char.to_lowercase()), + None => ToLowercase::invalid(self), + } + } + + /// See [`char::to_uppercase`]. + #[inline] + #[must_use] + pub fn to_uppercase(self) -> ToUppercase { + match self.as_char() { + Some(char) => ToUppercase::char(char.to_uppercase()), + None => ToUppercase::invalid(self), + } + } +} + +impl Debug for JavaCodePoint { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_char('\'')?; + for c in self.escape_debug_ext(EscapeDebugExtArgs { + escape_single_quote: true, + escape_double_quote: false, + }) { + f.write_char(c)?; + } + f.write_char('\'') + } +} + +impl Default for JavaCodePoint { + #[inline] + fn default() -> Self { + JavaCodePoint::from_char('\0') + } +} + +impl Display for JavaCodePoint { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.as_char().unwrap_or(char::REPLACEMENT_CHARACTER), f) + } +} + +impl From for u32 { + #[inline] + fn from(value: JavaCodePoint) -> Self { + value.as_u32() + } +} + +impl From for JavaCodePoint { + #[inline] + fn from(value: u8) -> Self { + JavaCodePoint::from_char(char::from(value)) + } +} + +impl FromStr for JavaCodePoint { + type Err = ParseCharError; + + #[inline] + fn from_str(s: &str) -> Result { + char::from_str(s).map(JavaCodePoint::from_char) + } +} + +impl Hash for JavaCodePoint { + #[inline] + fn hash(&self, state: &mut H) { + self.as_u32().hash(state) + } +} + +impl Ord for JavaCodePoint { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.as_u32().cmp(&other.as_u32()) + } +} + +impl PartialOrd for JavaCodePoint { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialOrd for JavaCodePoint { + #[inline] + fn partial_cmp(&self, other: &char) -> Option { + self.partial_cmp(&JavaCodePoint::from_char(*other)) + } +} + +impl PartialOrd for char { + #[inline] + fn partial_cmp(&self, other: &JavaCodePoint) -> Option { + JavaCodePoint::from_char(*self).partial_cmp(other) + } +} + +impl PartialEq for JavaCodePoint { + #[inline] + fn eq(&self, other: &char) -> bool { + self == &JavaCodePoint::from_char(*other) + } +} + +impl PartialEq for char { + #[inline] + fn eq(&self, other: &JavaCodePoint) -> bool { + &JavaCodePoint::from_char(*self) == other + } +} + +pub(crate) struct EscapeDebugExtArgs { + pub(crate) escape_single_quote: bool, + pub(crate) escape_double_quote: bool, +} + +impl EscapeDebugExtArgs { + pub(crate) const ESCAPE_ALL: Self = Self { + escape_single_quote: true, + escape_double_quote: true, + }; +} + +#[derive(Clone, Debug)] +pub struct CharEscapeIter { + inner: EscapeIterInner, +} + +#[derive(Clone, Debug)] +enum EscapeIterInner { + Printable(Once), + Escaped(EscapeIterEscaped), +} + +impl Display for EscapeIterInner { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + EscapeIterInner::Printable(char) => char.clone().try_for_each(|ch| f.write_char(ch)), + EscapeIterInner::Escaped(escaped) => Display::fmt(escaped, f), + } + } +} + +impl CharEscapeIter { + #[inline] + fn printable(char: char) -> Self { + CharEscapeIter { + inner: EscapeIterInner::Printable(once(char)), + } + } + + /// # Safety + /// Assumes that the input byte array is ASCII + #[inline] + unsafe fn new(bytes: [u8; N]) -> Self { + assert!(N <= 10, "Too many bytes in escape iter"); + let mut ten_bytes = [0; 10]; + ten_bytes[..N].copy_from_slice(&bytes); + CharEscapeIter { + inner: EscapeIterInner::Escaped(EscapeIterEscaped { + bytes: ten_bytes, + range: 0..N, + }), + } + } +} + +impl Iterator for CharEscapeIter { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + match &mut self.inner { + EscapeIterInner::Printable(printable) => printable.next(), + EscapeIterInner::Escaped(escaped) => escaped.next(), + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + match &self.inner { + EscapeIterInner::Printable(printable) => printable.size_hint(), + EscapeIterInner::Escaped(escaped) => escaped.size_hint(), + } + } +} + +impl ExactSizeIterator for CharEscapeIter { + #[inline] + fn len(&self) -> usize { + match &self.inner { + EscapeIterInner::Printable(printable) => printable.len(), + EscapeIterInner::Escaped(escaped) => escaped.len(), + } + } +} + +impl FusedIterator for CharEscapeIter {} + +impl Display for CharEscapeIter { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.inner, f) + } +} + +#[derive(Clone, Debug)] +struct EscapeIterEscaped { + // SAFETY: all values must be in the ASCII range + bytes: [u8; 10], + // SAFETY: range must not be out of bounds for length 10 + range: Range, +} + +impl Iterator for EscapeIterEscaped { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + self.range.next().map(|index| unsafe { + // SAFETY: the range is never out of bounds for length 10 + char::from(*self.bytes.get_unchecked(index)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.range.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.range.len() + } +} + +impl ExactSizeIterator for EscapeIterEscaped { + #[inline] + fn len(&self) -> usize { + self.range.len() + } +} + +impl FusedIterator for EscapeIterEscaped {} + +impl Display for EscapeIterEscaped { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let str = unsafe { + // SAFETY: all bytes are in ASCII range, and range is in bounds for length 10 + std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.range.clone())) + }; + f.write_str(str) + } +} + +pub type ToLowercase = CharIterDelegate; +pub type ToUppercase = CharIterDelegate; + +#[derive(Debug, Clone)] +pub struct CharIterDelegate(CharIterDelegateInner); + +impl CharIterDelegate { + #[inline] + fn char(iter: I) -> CharIterDelegate { + CharIterDelegate(CharIterDelegateInner::Char(iter)) + } + + #[inline] + fn invalid(code_point: JavaCodePoint) -> CharIterDelegate { + CharIterDelegate(CharIterDelegateInner::Invalid(Some(code_point).into_iter())) + } +} + +#[derive(Debug, Clone)] +enum CharIterDelegateInner { + Char(I), + Invalid(std::option::IntoIter), +} + +impl Iterator for CharIterDelegate +where + I: Iterator, +{ + type Item = JavaCodePoint; + + #[inline] + fn next(&mut self) -> Option { + match &mut self.0 { + CharIterDelegateInner::Char(char_iter) => { + char_iter.next().map(JavaCodePoint::from_char) + } + CharIterDelegateInner::Invalid(code_point) => code_point.next(), + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + match &self.0 { + CharIterDelegateInner::Char(char_iter) => char_iter.size_hint(), + CharIterDelegateInner::Invalid(code_point) => code_point.size_hint(), + } + } +} + +impl DoubleEndedIterator for CharIterDelegate +where + I: Iterator + DoubleEndedIterator, +{ + #[inline] + fn next_back(&mut self) -> Option { + match &mut self.0 { + CharIterDelegateInner::Char(char_iter) => { + char_iter.next_back().map(JavaCodePoint::from_char) + } + CharIterDelegateInner::Invalid(code_point) => code_point.next_back(), + } + } +} + +impl ExactSizeIterator for CharIterDelegate where I: Iterator + ExactSizeIterator {} + +impl FusedIterator for CharIterDelegate where I: Iterator + FusedIterator {} diff --git a/java_string/src/error.rs b/java_string/src/error.rs new file mode 100644 index 0000000..09742d0 --- /dev/null +++ b/java_string/src/error.rs @@ -0,0 +1,126 @@ +use std::error::Error; +use std::fmt; +use std::fmt::{Display, Formatter}; + +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +pub struct Utf8Error { + pub(crate) valid_up_to: usize, + pub(crate) error_len: Option, +} + +impl Utf8Error { + #[must_use] + #[inline] + pub const fn valid_up_to(&self) -> usize { + self.valid_up_to + } + + #[must_use] + #[inline] + pub const fn error_len(&self) -> Option { + // Manual implementation of Option::map since it's not const + match self.error_len { + Some(len) => Some(len as usize), + None => None, + } + } + + #[must_use] + #[inline] + pub(crate) const fn from_std(value: std::str::Utf8Error) -> Self { + Self { + valid_up_to: value.valid_up_to(), + // Manual implementation of Option::map since it's not const + error_len: match value.error_len() { + Some(error_len) => Some(error_len as u8), + None => None, + }, + } + } +} + +impl Display for Utf8Error { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if let Some(error_len) = self.error_len { + write!( + f, + "invalid utf-8 sequence of {} bytes from index {}", + error_len, self.valid_up_to + ) + } else { + write!( + f, + "incomplete utf-8 byte sequence from index {}", + self.valid_up_to + ) + } + } +} + +impl From for Utf8Error { + #[inline] + fn from(value: std::str::Utf8Error) -> Self { + Self::from_std(value) + } +} + +impl Error for Utf8Error {} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FromUtf8Error { + pub(crate) bytes: Vec, + pub(crate) error: Utf8Error, +} + +impl FromUtf8Error { + pub fn as_bytes(&self) -> &[u8] { + &self.bytes[..] + } + + #[must_use] + pub fn into_bytes(self) -> Vec { + self.bytes + } + + pub fn utf8_error(&self) -> Utf8Error { + self.error + } +} + +impl Display for FromUtf8Error { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.error, f) + } +} + +impl Error for FromUtf8Error {} + +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +pub enum ParseError { + InvalidUtf8(Utf8Error), + Err(E), +} + +impl Display for ParseError +where + E: Display, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + ParseError::InvalidUtf8(err) => Display::fmt(err, f), + ParseError::Err(err) => Display::fmt(err, f), + } + } +} + +impl Error for ParseError +where + E: Error + 'static, +{ + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + ParseError::InvalidUtf8(err) => Some(err), + ParseError::Err(err) => Some(err), + } + } +} diff --git a/java_string/src/iter.rs b/java_string/src/iter.rs new file mode 100644 index 0000000..f936053 --- /dev/null +++ b/java_string/src/iter.rs @@ -0,0 +1,977 @@ +use std::fmt::{Debug, Display, Formatter, Write}; +use std::iter::{Chain, Copied, Filter, FlatMap, Flatten, FusedIterator, Map}; +use std::{option, slice}; + +use crate::validations::{next_code_point, next_code_point_reverse}; +use crate::{CharEscapeIter, JavaCodePoint, JavaStr, JavaStrPattern}; +macro_rules! delegate { + (Iterator for $ty:ident $(<$($lt:lifetime),+>)? => $item:ty $(, DoubleEnded = $double_ended:ty)?) => { + impl$(<$($lt),+>)? Iterator for $ty$(<$($lt),+>)? { + type Item = $item; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.inner.count() + } + + #[inline] + fn last(self) -> Option { + self.inner.last() + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + self.inner.nth(n) + } + + #[inline] + fn all(&mut self, f: F) -> bool + where + F: FnMut(Self::Item) -> bool, + { + self.inner.all(f) + } + + #[inline] + fn any(&mut self, f: F) -> bool + where + F: FnMut(Self::Item) -> bool, + { + self.inner.any(f) + } + + #[inline] + fn find

(&mut self, predicate: P) -> Option + where + P: FnMut(&Self::Item) -> bool, + { + self.inner.find(predicate) + } + + #[inline] + fn position

(&mut self, predicate: P) -> Option + where + P: FnMut(Self::Item) -> bool, + { + self.inner.position(predicate) + } + + $( + #[inline] + fn rposition

(&mut self, predicate: P) -> Option + where + P: FnMut(Self::Item) -> bool, + { + let _test: $double_ended = (); + self.inner.rposition(predicate) + } + )? + } + }; + + (DoubleEndedIterator for $ty:ident $(<$($lt:lifetime),+>)?) => { + impl$(<$($lt),+>)? DoubleEndedIterator for $ty$(<$($lt),+>)? { + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next_back() + } + + #[inline] + fn nth_back(&mut self, n: usize) -> Option { + self.inner.nth_back(n) + } + + #[inline] + fn rfind

(&mut self, predicate: P) -> Option + where + P: FnMut(&Self::Item) -> bool, + { + self.inner.rfind(predicate) + } + } + }; + + (ExactSizeIterator for $ty:ident $(<$($lt:lifetime),+>)?) => { + impl$(<$($lt),+>)? ExactSizeIterator for $ty$(<$($lt),+>)? { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } + } + }; + + (FusedIterator for $ty:ident $(<$($lt:lifetime),+>)?) => { + impl$(<$($lt),+>)? FusedIterator for $ty$(<$($lt),+>)? {} + }; + + (Iterator, DoubleEndedIterator, ExactSizeIterator, FusedIterator for $ty:ident $(<$($lt:lifetime),+>)? => $item:ty) => { + delegate!(Iterator for $ty$(<$($lt),+>)? => $item, DoubleEnded = ()); + delegate!(DoubleEndedIterator for $ty$(<$($lt),+>)?); + delegate!(ExactSizeIterator for $ty$(<$($lt),+>)?); + delegate!(FusedIterator for $ty$(<$($lt),+>)?); + }; +} + +#[must_use] +#[derive(Clone, Debug)] +pub struct Bytes<'a> { + pub(crate) inner: Copied>, +} +delegate!(Iterator, DoubleEndedIterator, ExactSizeIterator, FusedIterator for Bytes<'a> => u8); + +#[derive(Clone, Debug)] +#[must_use] +pub struct EscapeDebug<'a> { + #[allow(clippy::type_complexity)] + pub(crate) inner: Chain< + Flatten>, + FlatMap, CharEscapeIter, fn(JavaCodePoint) -> CharEscapeIter>, + >, +} +delegate!(Iterator for EscapeDebug<'a> => char); +delegate!(FusedIterator for EscapeDebug<'a>); +impl<'a> Display for EscapeDebug<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.clone().try_for_each(|c| f.write_char(c)) + } +} + +#[derive(Clone, Debug)] +#[must_use] +pub struct EscapeDefault<'a> { + pub(crate) inner: FlatMap, CharEscapeIter, fn(JavaCodePoint) -> CharEscapeIter>, +} +delegate!(Iterator for EscapeDefault<'a> => char); +delegate!(FusedIterator for EscapeDefault<'a>); +impl<'a> Display for EscapeDefault<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.clone().try_for_each(|c| f.write_char(c)) + } +} + +#[derive(Clone, Debug)] +#[must_use] +pub struct EscapeUnicode<'a> { + pub(crate) inner: FlatMap, CharEscapeIter, fn(JavaCodePoint) -> CharEscapeIter>, +} +delegate!(Iterator for EscapeUnicode<'a> => char); +delegate!(FusedIterator for EscapeUnicode<'a>); +impl<'a> Display for EscapeUnicode<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.clone().try_for_each(|c| f.write_char(c)) + } +} + +#[derive(Clone, Debug)] +#[must_use] +pub struct Lines<'a> { + pub(crate) inner: Map, fn(&JavaStr) -> &JavaStr>, +} +delegate!(Iterator for Lines<'a> => &'a JavaStr); +delegate!(DoubleEndedIterator for Lines<'a>); +delegate!(FusedIterator for Lines<'a>); + +#[derive(Clone)] +#[must_use] +pub struct Chars<'a> { + pub(crate) inner: slice::Iter<'a, u8>, +} + +impl<'a> Iterator for Chars<'a> { + type Item = JavaCodePoint; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: `JavaStr` invariant says `self.inner` is a semi-valid UTF-8 string + // and the resulting `ch` is a valid Unicode Scalar Value or surrogate + // code point. + unsafe { next_code_point(&mut self.inner).map(|ch| JavaCodePoint::from_u32_unchecked(ch)) } + } + + // TODO: std has an optimized count impl + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = self.inner.len(); + // `(len + 3)` can't overflow, because we know that the `slice::Iter` + // belongs to a slice in memory which has a maximum length of + // `isize::MAX` (that's well below `usize::MAX`). + ((len + 3) / 4, Some(len)) + } + + #[inline] + fn last(mut self) -> Option { + // No need to go through the entire string. + self.next_back() + } +} + +impl Debug for Chars<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Chars(")?; + f.debug_list().entries(self.clone()).finish()?; + write!(f, ")")?; + Ok(()) + } +} + +impl<'a> DoubleEndedIterator for Chars<'a> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: `JavaStr` invariant says `self.inner` is a semi-valid UTF-8 string + // and the resulting `ch` is a valid Unicode Scalar Value or surrogate + // code point. + unsafe { + next_code_point_reverse(&mut self.inner).map(|ch| JavaCodePoint::from_u32_unchecked(ch)) + } + } +} + +impl FusedIterator for Chars<'_> {} + +impl<'a> Chars<'a> { + #[inline] + #[must_use] + pub fn as_str(&self) -> &'a JavaStr { + // SAFETY: `Chars` is only made from a JavaStr, which guarantees the iter is + // semi-valid UTF-8. + unsafe { JavaStr::from_semi_utf8_unchecked(self.inner.as_slice()) } + } +} + +#[derive(Clone, Debug)] +#[must_use] +pub struct CharIndices<'a> { + pub(crate) front_offset: usize, + pub(crate) inner: Chars<'a>, +} + +impl<'a> Iterator for CharIndices<'a> { + type Item = (usize, JavaCodePoint); + + #[inline] + fn next(&mut self) -> Option<(usize, JavaCodePoint)> { + let pre_len = self.inner.inner.len(); + match self.inner.next() { + None => None, + Some(ch) => { + let index = self.front_offset; + let len = self.inner.inner.len(); + self.front_offset += pre_len - len; + Some((index, ch)) + } + } + } + + #[inline] + fn count(self) -> usize { + self.inner.count() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + #[inline] + fn last(mut self) -> Option<(usize, JavaCodePoint)> { + // No need to go through the entire string. + self.next_back() + } +} + +impl<'a> DoubleEndedIterator for CharIndices<'a> { + #[inline] + fn next_back(&mut self) -> Option<(usize, JavaCodePoint)> { + self.inner.next_back().map(|ch| { + let index = self.front_offset + self.inner.inner.len(); + (index, ch) + }) + } +} + +impl FusedIterator for CharIndices<'_> {} + +impl<'a> CharIndices<'a> { + #[inline] + #[must_use] + pub fn as_str(&self) -> &'a JavaStr { + self.inner.as_str() + } +} + +#[must_use] +#[derive(Debug, Clone)] +pub struct Matches<'a, P> { + pub(crate) str: &'a JavaStr, + pub(crate) pat: P, +} + +impl<'a, P> Iterator for Matches<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + if let Some((index, len)) = self.pat.find_in(self.str) { + // SAFETY: pattern returns valid indices + let ret = unsafe { self.str.get_unchecked(index..index + len) }; + self.str = unsafe { self.str.get_unchecked(index + len..) }; + Some(ret) + } else { + self.str = Default::default(); + None + } + } +} + +impl<'a, P> DoubleEndedIterator for Matches<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + if let Some((index, len)) = self.pat.rfind_in(self.str) { + // SAFETY: pattern returns valid indices + let ret = unsafe { self.str.get_unchecked(index..index + len) }; + self.str = unsafe { self.str.get_unchecked(..index) }; + Some(ret) + } else { + self.str = Default::default(); + None + } + } +} + +#[must_use] +#[derive(Clone, Debug)] +pub struct RMatches<'a, P> { + pub(crate) inner: Matches<'a, P>, +} + +impl<'a, P> Iterator for RMatches<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next_back() + } +} + +impl<'a, P> DoubleEndedIterator for RMatches<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next() + } +} + +#[must_use] +#[derive(Clone, Debug)] +pub struct MatchIndices<'a, P> { + pub(crate) str: &'a JavaStr, + pub(crate) start: usize, + pub(crate) pat: P, +} + +impl<'a, P> Iterator for MatchIndices<'a, P> +where + P: JavaStrPattern, +{ + type Item = (usize, &'a JavaStr); + + #[inline] + fn next(&mut self) -> Option { + if let Some((index, len)) = self.pat.find_in(self.str) { + let full_index = self.start + index; + self.start = full_index + len; + // SAFETY: pattern returns valid indices + let ret = unsafe { self.str.get_unchecked(index..index + len) }; + self.str = unsafe { self.str.get_unchecked(index + len..) }; + Some((full_index, ret)) + } else { + self.start += self.str.len(); + self.str = Default::default(); + None + } + } +} + +impl<'a, P> DoubleEndedIterator for MatchIndices<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + if let Some((index, len)) = self.pat.rfind_in(self.str) { + // SAFETY: pattern returns valid indices + let ret = unsafe { self.str.get_unchecked(index..index + len) }; + self.str = unsafe { self.str.get_unchecked(..index) }; + Some((self.start + index, ret)) + } else { + self.str = Default::default(); + None + } + } +} + +#[derive(Clone, Debug)] +pub struct RMatchIndices<'a, P> { + pub(crate) inner: MatchIndices<'a, P>, +} + +impl<'a, P> Iterator for RMatchIndices<'a, P> +where + P: JavaStrPattern, +{ + type Item = (usize, &'a JavaStr); + + #[inline] + fn next(&mut self) -> Option { + self.inner.next_back() + } +} + +impl<'a, P> DoubleEndedIterator for RMatchIndices<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next() + } +} + +#[derive(Clone, Debug)] +struct SplitHelper<'a, P> { + start: usize, + end: usize, + haystack: &'a JavaStr, + pat: P, + allow_trailing_empty: bool, + finished: bool, + had_empty_match: bool, +} + +impl<'a, P> SplitHelper<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn new(haystack: &'a JavaStr, pat: P, allow_trailing_empty: bool) -> Self { + Self { + start: 0, + end: haystack.len(), + haystack, + pat, + allow_trailing_empty, + finished: false, + had_empty_match: false, + } + } + + #[inline] + fn get_end(&mut self) -> Option<&'a JavaStr> { + if !self.finished { + self.finished = true; + + if self.allow_trailing_empty || self.end - self.start > 0 { + // SAFETY: `self.start` and `self.end` always lie on unicode boundaries. + let string = unsafe { self.haystack.get_unchecked(self.start..self.end) }; + return Some(string); + } + } + + None + } + + #[inline] + fn next_match(&mut self) -> Option<(usize, usize)> { + // SAFETY: `self.start` always lies on a unicode boundary. + let substr = unsafe { self.haystack.get_unchecked(self.start..) }; + + let result = if self.had_empty_match { + // if we had an empty match before, we are going to find the empty match again. + // don't do that, search from the next index along. + + if substr.is_empty() { + None + } else { + // SAFETY: we can pop the string because we already checked if the string is + // empty above + let first_char_len = unsafe { substr.chars().next().unwrap_unchecked().len_utf8() }; + let popped_str = unsafe { substr.get_unchecked(first_char_len..) }; + + self.pat + .find_in(popped_str) + .map(|(index, len)| (index + first_char_len + self.start, len)) + } + } else { + self.pat + .find_in(substr) + .map(|(index, len)| (index + self.start, len)) + }; + + self.had_empty_match = result.is_some_and(|(_, len)| len == 0); + + result + } + + #[inline] + fn next(&mut self) -> Option<&'a JavaStr> { + if self.finished { + return None; + } + + match self.next_match() { + Some((index, len)) => unsafe { + // SAFETY: pattern guarantees valid indices + let elt = self.haystack.get_unchecked(self.start..index); + self.start = index + len; + Some(elt) + }, + None => self.get_end(), + } + } + + #[inline] + fn next_inclusive(&mut self) -> Option<&'a JavaStr> { + if self.finished { + return None; + } + + match self.next_match() { + Some((index, len)) => unsafe { + // SAFETY: pattern guarantees valid indices + let elt = self.haystack.get_unchecked(self.start..index + len); + self.start = index + len; + Some(elt) + }, + None => self.get_end(), + } + } + + #[inline] + fn next_match_back(&mut self) -> Option<(usize, usize)> { + // SAFETY: `self.end` always lies on a unicode boundary. + let substr = unsafe { self.haystack.get_unchecked(..self.end) }; + + let result = if self.had_empty_match { + // if we had an empty match before, we are going to find the empty match again. + // don't do that, search from the next index along. + + if substr.is_empty() { + None + } else { + // SAFETY: we can pop the string because we already checked if the string is + // empty above + let last_char_len = + unsafe { substr.chars().next_back().unwrap_unchecked().len_utf8() }; + let popped_str = unsafe { substr.get_unchecked(..substr.len() - last_char_len) }; + + self.pat.rfind_in(popped_str) + } + } else { + self.pat.rfind_in(substr) + }; + + self.had_empty_match = result.is_some_and(|(_, len)| len == 0); + + result + } + + #[inline] + fn next_back(&mut self) -> Option<&'a JavaStr> { + if self.finished { + return None; + } + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + + match self.next_match_back() { + Some((index, len)) => unsafe { + // SAFETY: pattern guarantees valid indices + let elt = self.haystack.get_unchecked(index + len..self.end); + self.end = index; + Some(elt) + }, + None => unsafe { + // SAFETY: `self.start` and `self.end` always lie on unicode boundaries. + self.finished = true; + Some(self.haystack.get_unchecked(self.start..self.end)) + }, + } + } + + #[inline] + fn next_back_inclusive(&mut self) -> Option<&'a JavaStr> { + if self.finished { + return None; + } + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back_inclusive() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + + match self.next_match_back() { + Some((index, len)) => { + // SAFETY: pattern guarantees valid indices + let elt = unsafe { self.haystack.get_unchecked(index + len..self.end) }; + self.end = index + len; + Some(elt) + } + None => { + self.finished = true; + // SAFETY: `self.start` and `self.end` always lie on unicode boundaries. + Some(unsafe { self.haystack.get_unchecked(self.start..self.end) }) + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Split<'a, P> { + inner: SplitHelper<'a, P>, +} + +impl<'a, P> Split<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self { + Split { + inner: SplitHelper::new(haystack, pat, true), + } + } +} + +impl<'a, P> Iterator for Split<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next() + } +} + +impl<'a, P> DoubleEndedIterator for Split<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next_back() + } +} + +impl<'a, P> FusedIterator for Split<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct RSplit<'a, P> { + inner: SplitHelper<'a, P>, +} + +impl<'a, P> RSplit<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self { + RSplit { + inner: SplitHelper::new(haystack, pat, true), + } + } +} + +impl<'a, P> Iterator for RSplit<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next_back() + } +} + +impl<'a, P> DoubleEndedIterator for RSplit<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next() + } +} + +impl<'a, P> FusedIterator for RSplit<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct SplitTerminator<'a, P> { + inner: SplitHelper<'a, P>, +} + +impl<'a, P> SplitTerminator<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self { + SplitTerminator { + inner: SplitHelper::new(haystack, pat, false), + } + } +} + +impl<'a, P> Iterator for SplitTerminator<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next() + } +} + +impl<'a, P> DoubleEndedIterator for SplitTerminator<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next_back() + } +} + +impl<'a, P> FusedIterator for SplitTerminator<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct RSplitTerminator<'a, P> { + inner: SplitHelper<'a, P>, +} + +impl<'a, P> RSplitTerminator<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self { + RSplitTerminator { + inner: SplitHelper::new(haystack, pat, false), + } + } +} + +impl<'a, P> Iterator for RSplitTerminator<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next_back() + } +} + +impl<'a, P> DoubleEndedIterator for RSplitTerminator<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next() + } +} + +impl<'a, P> FusedIterator for RSplitTerminator<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct SplitInclusive<'a, P> { + inner: SplitHelper<'a, P>, +} + +impl<'a, P> SplitInclusive<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self { + SplitInclusive { + inner: SplitHelper::new(haystack, pat, false), + } + } +} + +impl<'a, P> Iterator for SplitInclusive<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + self.inner.next_inclusive() + } +} + +impl<'a, P> DoubleEndedIterator for SplitInclusive<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next_back_inclusive() + } +} + +impl<'a, P> FusedIterator for SplitInclusive<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct SplitN<'a, P> { + inner: SplitHelper<'a, P>, + count: usize, +} + +impl<'a, P> SplitN<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P, count: usize) -> Self { + SplitN { + inner: SplitHelper::new(haystack, pat, true), + count, + } + } +} + +impl<'a, P> Iterator for SplitN<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + match self.count { + 0 => None, + 1 => { + self.count = 0; + self.inner.get_end() + } + _ => { + self.count -= 1; + self.inner.next() + } + } + } +} + +impl<'a, P> FusedIterator for SplitN<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct RSplitN<'a, P> { + inner: SplitHelper<'a, P>, + count: usize, +} + +impl<'a, P> RSplitN<'a, P> +where + P: JavaStrPattern, +{ + #[inline] + pub(crate) fn new(haystack: &'a JavaStr, pat: P, count: usize) -> Self { + RSplitN { + inner: SplitHelper::new(haystack, pat, true), + count, + } + } +} + +impl<'a, P> Iterator for RSplitN<'a, P> +where + P: JavaStrPattern, +{ + type Item = &'a JavaStr; + + #[inline] + fn next(&mut self) -> Option { + match self.count { + 0 => None, + 1 => { + self.count = 0; + self.inner.get_end() + } + _ => { + self.count -= 1; + self.inner.next_back() + } + } + } +} + +impl<'a, P> FusedIterator for RSplitN<'a, P> where P: JavaStrPattern {} + +#[derive(Clone, Debug)] +pub struct SplitAsciiWhitespace<'a> { + #[allow(clippy::type_complexity)] + pub(crate) inner: Map< + Filter bool>, fn(&&[u8]) -> bool>, + fn(&[u8]) -> &JavaStr, + >, +} +delegate!(Iterator for SplitAsciiWhitespace<'a> => &'a JavaStr); +delegate!(DoubleEndedIterator for SplitAsciiWhitespace<'a>); +delegate!(FusedIterator for SplitAsciiWhitespace<'a>); + +#[derive(Clone, Debug)] +pub struct SplitWhitespace<'a> { + #[allow(clippy::type_complexity)] + pub(crate) inner: Filter bool>, fn(&&JavaStr) -> bool>, +} +delegate!(Iterator for SplitWhitespace<'a> => &'a JavaStr); +delegate!(DoubleEndedIterator for SplitWhitespace<'a>); +delegate!(FusedIterator for SplitWhitespace<'a>); diff --git a/java_string/src/lib.rs b/java_string/src/lib.rs new file mode 100644 index 0000000..18e658a --- /dev/null +++ b/java_string/src/lib.rs @@ -0,0 +1,26 @@ +#![doc = include_str!("../README.md")] + +mod cesu8; +mod char; +mod error; +mod iter; +mod owned; +mod pattern; +#[cfg(feature = "serde")] +mod serde; +mod slice; +pub(crate) mod validations; + +pub use char::*; +pub use error::*; +pub use iter::*; +pub use owned::*; +pub use pattern::*; +pub use slice::*; + +#[macro_export] +macro_rules! format_java { + ($($arg:tt)*) => { + $crate::JavaString::from(::std::format!($($arg)*)) + } +} diff --git a/java_string/src/owned.rs b/java_string/src/owned.rs new file mode 100644 index 0000000..74aebee --- /dev/null +++ b/java_string/src/owned.rs @@ -0,0 +1,1400 @@ +use std::borrow::{Borrow, BorrowMut, Cow}; +use std::collections::{Bound, TryReserveError}; +use std::convert::Infallible; +use std::fmt::{Debug, Display, Formatter, Write}; +use std::hash::{Hash, Hasher}; +use std::iter::FusedIterator; +use std::ops::{ + Add, AddAssign, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, + RangeInclusive, RangeTo, RangeToInclusive, +}; +use std::rc::Rc; +use std::str::FromStr; +use std::sync::Arc; +use std::{ptr, slice}; + +use crate::validations::{ + run_utf8_full_validation_from_semi, run_utf8_semi_validation, to_range_checked, +}; +use crate::{Chars, FromUtf8Error, JavaCodePoint, JavaStr, Utf8Error}; + +#[derive(Default, PartialEq, PartialOrd, Eq, Ord)] +pub struct JavaString { + vec: Vec, +} + +impl JavaString { + #[inline] + #[must_use] + pub const fn new() -> JavaString { + JavaString { vec: Vec::new() } + } + + #[inline] + #[must_use] + pub fn with_capacity(capacity: usize) -> JavaString { + JavaString { + vec: Vec::with_capacity(capacity), + } + } + + /// Converts `vec` to a `JavaString` if it is fully-valid UTF-8, i.e. UTF-8 + /// without surrogate code points. See [`String::from_utf8`]. + #[inline] + pub fn from_full_utf8(vec: Vec) -> Result { + match std::str::from_utf8(&vec) { + Ok(..) => Ok(JavaString { vec }), + Err(e) => Err(FromUtf8Error { + bytes: vec, + error: e.into(), + }), + } + } + + /// Converts `vec` to a `JavaString` if it is semi-valid UTF-8, i.e. UTF-8 + /// with surrogate code points. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaString}; + /// + /// assert_eq!( + /// JavaString::from_semi_utf8(b"Hello World!".to_vec()).unwrap(), + /// "Hello World!" + /// ); + /// assert_eq!( + /// JavaString::from_semi_utf8(vec![0xF0, 0x9F, 0x92, 0x96]).unwrap(), + /// "๐Ÿ’–" + /// ); + /// assert_eq!( + /// JavaString::from_semi_utf8(vec![0xED, 0xA0, 0x80]).unwrap(), + /// JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()) + /// ); + /// assert!(JavaString::from_semi_utf8(vec![0xED]).is_err()); + /// ``` + pub fn from_semi_utf8(vec: Vec) -> Result { + match run_utf8_semi_validation(&vec) { + Ok(..) => Ok(JavaString { vec }), + Err(err) => Err(FromUtf8Error { + bytes: vec, + error: err, + }), + } + } + + /// Converts `v` to a `Cow`, replacing invalid semi-UTF-8 with the + /// replacement character ๏ฟฝ. + /// + /// ``` + /// # use std::borrow::Cow; + /// # use java_string::{JavaStr, JavaString}; + /// + /// let sparkle_heart = [0xF0, 0x9F, 0x92, 0x96]; + /// let result = JavaString::from_semi_utf8_lossy(&sparkle_heart); + /// assert!(matches!(result, Cow::Borrowed(_))); + /// assert_eq!(result, JavaStr::from_str("๐Ÿ’–")); + /// + /// let foobar_with_error = [b'f', b'o', b'o', 0xED, b'b', b'a', b'r']; + /// let result = JavaString::from_semi_utf8_lossy(&foobar_with_error); + /// assert!(matches!(result, Cow::Owned(_))); + /// assert_eq!(result, JavaStr::from_str("foo๏ฟฝbar")); + /// ``` + #[must_use] + pub fn from_semi_utf8_lossy(v: &[u8]) -> Cow<'_, JavaStr> { + const REPLACEMENT: &str = "\u{FFFD}"; + + match run_utf8_semi_validation(v) { + Ok(()) => unsafe { + // SAFETY: validation succeeded + Cow::Borrowed(JavaStr::from_semi_utf8_unchecked(v)) + }, + Err(error) => { + let mut result = unsafe { + // SAFETY: validation succeeded up to this index + JavaString::from_semi_utf8_unchecked( + v.get_unchecked(..error.valid_up_to).to_vec(), + ) + }; + result.push_str(REPLACEMENT); + let mut index = error.valid_up_to + error.error_len.unwrap_or(1) as usize; + loop { + match run_utf8_semi_validation(&v[index..]) { + Ok(()) => { + unsafe { + // SAFETY: validation succeeded + result + .push_java_str(JavaStr::from_semi_utf8_unchecked(&v[index..])); + } + return Cow::Owned(result); + } + Err(error) => { + unsafe { + // SAFETY: validation succeeded up to this index + result.push_java_str(JavaStr::from_semi_utf8_unchecked( + v.get_unchecked(index..index + error.valid_up_to), + )); + } + result.push_str(REPLACEMENT); + index += error.valid_up_to + error.error_len.unwrap_or(1) as usize; + } + } + } + } + } + } + + /// # Safety + /// + /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus + /// surrogate code points. + #[inline] + #[must_use] + pub unsafe fn from_semi_utf8_unchecked(bytes: Vec) -> JavaString { + JavaString { vec: bytes } + } + + /// See [`String::into_bytes`]. + #[inline] + #[must_use] + pub fn into_bytes(self) -> Vec { + self.vec + } + + /// See [`String::as_str`]. + #[inline] + #[must_use] + pub fn as_java_str(&self) -> &JavaStr { + unsafe { + // SAFETY: this str has semi-valid UTF-8 + JavaStr::from_semi_utf8_unchecked(&self.vec) + } + } + + /// See [`String::as_mut_str`]. + #[inline] + #[must_use] + pub fn as_mut_java_str(&mut self) -> &mut JavaStr { + unsafe { + // SAFETY: this str has semi-valid UTF-8 + JavaStr::from_semi_utf8_unchecked_mut(&mut self.vec) + } + } + + /// Tries to convert this `JavaString` to a `String`, returning an error if + /// it is not fully valid UTF-8, i.e. has no surrogate code points. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaString}; + /// + /// assert_eq!( + /// JavaString::from("Hello World!").into_string().unwrap(), + /// "Hello World!" + /// ); + /// assert_eq!( + /// JavaString::from("abc\0โ„๐Ÿ’ฃ").into_string().unwrap(), + /// "abc\0โ„๐Ÿ’ฃ" + /// ); + /// + /// let string_with_error = JavaString::from("abc") + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str(); + /// assert!(string_with_error.into_string().is_err()); + /// ``` + pub fn into_string(self) -> Result { + run_utf8_full_validation_from_semi(self.as_bytes()).map(|()| unsafe { + // SAFETY: validation succeeded + self.into_string_unchecked() + }) + } + + /// # Safety + /// + /// This string must be fully valid UTF-8, i.e. have no surrogate code + /// points. + #[inline] + #[must_use] + pub unsafe fn into_string_unchecked(self) -> String { + // SAFETY: preconditions checked by caller + String::from_utf8_unchecked(self.vec) + } + + /// See [`String::push_str`]. + #[inline] + pub fn push_java_str(&mut self, string: &JavaStr) { + self.vec.extend_from_slice(string.as_bytes()) + } + + /// See [`String::push_str`]. + #[inline] + pub fn push_str(&mut self, string: &str) { + self.vec.extend_from_slice(string.as_bytes()) + } + + /// See [`String::capacity`]. + #[inline] + #[must_use] + pub fn capacity(&self) -> usize { + self.vec.capacity() + } + + /// See [`String::reserve`]. + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.vec.reserve(additional) + } + + /// See [`String::reserve_exact`]. + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.vec.reserve_exact(additional) + } + + /// See [`String::try_reserve`]. + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve(additional) + } + + /// See [`String::try_reserve_exact`]. + #[inline] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve_exact(additional) + } + + /// See [`String::shrink_to_fit`]. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.vec.shrink_to_fit() + } + + /// See [`String::shrink_to`]. + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.vec.shrink_to(min_capacity) + } + + /// See [`String::push`]. + #[inline] + pub fn push(&mut self, ch: char) { + match ch.len_utf8() { + 1 => self.vec.push(ch as u8), + _ => self + .vec + .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + } + } + + /// See [`String::push`]. + #[inline] + pub fn push_java(&mut self, ch: JavaCodePoint) { + match ch.len_utf8() { + 1 => self.vec.push(ch.as_u32() as u8), + _ => self.vec.extend_from_slice(ch.encode_semi_utf8(&mut [0; 4])), + } + } + + /// See [`String::as_bytes`]. + #[inline] + #[must_use] + pub fn as_bytes(&self) -> &[u8] { + &self.vec + } + + /// See [`String::truncate`]. + #[inline] + pub fn truncate(&mut self, new_len: usize) { + if new_len <= self.len() { + assert!(self.is_char_boundary(new_len)); + self.vec.truncate(new_len) + } + } + + /// See [`String::pop`]. + /// + /// ``` + /// # use java_string::JavaString; + /// + /// let mut str = JavaString::from("Hello World!"); + /// assert_eq!(str.pop().unwrap(), '!'); + /// assert_eq!(str, "Hello World"); + /// + /// let mut str = JavaString::from("ๆฑไบฌ"); + /// assert_eq!(str.pop().unwrap(), 'ไบฌ'); + /// assert_eq!(str, "ๆฑ"); + /// + /// assert!(JavaString::new().pop().is_none()); + /// ``` + #[inline] + pub fn pop(&mut self) -> Option { + let ch = self.chars().next_back()?; + let newlen = self.len() - ch.len_utf8(); + unsafe { + self.vec.set_len(newlen); + } + Some(ch) + } + + /// See [`String::remove`]. + /// + /// ``` + /// # use java_string::JavaString; + /// + /// let mut str = JavaString::from("Hello World!"); + /// assert_eq!(str.remove(5), ' '); + /// assert_eq!(str, "HelloWorld!"); + /// + /// let mut str = JavaString::from("Hello ๐Ÿฆ€ World!"); + /// assert_eq!(str.remove(6), '๐Ÿฆ€'); + /// assert_eq!(str, "Hello World!"); + /// ``` + /// ```should_panic + /// # use java_string::JavaString; + /// // Should panic + /// JavaString::new().remove(0); + /// ``` + /// ```should_panic + /// # use java_string::JavaString; + /// // Should panic + /// JavaString::from("๐Ÿฆ€").remove(1); + /// ``` + #[inline] + pub fn remove(&mut self, idx: usize) -> JavaCodePoint { + let Some(ch) = self[idx..].chars().next() else { + panic!("cannot remove a char from the end of a string") + }; + + let next = idx + ch.len_utf8(); + let len = self.len(); + unsafe { + ptr::copy( + self.vec.as_ptr().add(next), + self.vec.as_mut_ptr().add(idx), + len - next, + ); + self.vec.set_len(len - (next - idx)); + } + ch + } + + /// See [`String::retain`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaString}; + /// + /// let mut str = JavaString::from("Hello ๐Ÿฆ€ World!"); + /// str.retain(|ch| !ch.is_ascii_uppercase()); + /// assert_eq!(str, "ello ๐Ÿฆ€ orld!"); + /// str.retain(JavaCodePoint::is_ascii); + /// assert_eq!(str, "ello orld!"); + /// ``` + #[inline] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(JavaCodePoint) -> bool, + { + struct SetLenOnDrop<'a> { + s: &'a mut JavaString, + idx: usize, + del_bytes: usize, + } + + impl<'a> Drop for SetLenOnDrop<'a> { + #[inline] + fn drop(&mut self) { + let new_len = self.idx - self.del_bytes; + debug_assert!(new_len <= self.s.len()); + unsafe { self.s.vec.set_len(new_len) }; + } + } + + let len = self.len(); + let mut guard = SetLenOnDrop { + s: self, + idx: 0, + del_bytes: 0, + }; + + while guard.idx < len { + // SAFETY: `guard.idx` is positive-or-zero and less that len so the + // `get_unchecked` is in bound. `self` is valid UTF-8 like string + // and the returned slice starts at a unicode code point so the + // `Chars` always return one character. + let ch = unsafe { + guard + .s + .get_unchecked(guard.idx..len) + .chars() + .next() + .unwrap_unchecked() + }; + let ch_len = ch.len_utf8(); + + if !f(ch) { + guard.del_bytes += ch_len; + } else if guard.del_bytes > 0 { + // SAFETY: `guard.idx` is in bound and `guard.del_bytes` represent the number of + // bytes that are erased from the string so the resulting `guard.idx - + // guard.del_bytes` always represent a valid unicode code point. + // + // `guard.del_bytes` >= `ch.len_utf8()`, so taking a slice with `ch.len_utf8()` + // len is safe. + ch.encode_semi_utf8(unsafe { + slice::from_raw_parts_mut( + guard.s.as_mut_ptr().add(guard.idx - guard.del_bytes), + ch.len_utf8(), + ) + }); + } + + // Point idx to the next char + guard.idx += ch_len; + } + + drop(guard); + } + + /// See [`String::insert`]. + /// + /// ``` + /// # use java_string::JavaString; + /// let mut s = JavaString::from("foo"); + /// s.insert(3, 'a'); + /// s.insert(4, 'r'); + /// s.insert(3, 'b'); + /// assert_eq!(s, "foobar"); + /// ``` + #[inline] + pub fn insert(&mut self, idx: usize, ch: char) { + assert!(self.is_char_boundary(idx)); + let mut bits = [0; 4]; + let bits = ch.encode_utf8(&mut bits).as_bytes(); + + unsafe { + self.insert_bytes(idx, bits); + } + } + + /// See [`String::insert`]. + #[inline] + pub fn insert_java(&mut self, idx: usize, ch: JavaCodePoint) { + assert!(self.is_char_boundary(idx)); + let mut bits = [0; 4]; + let bits = ch.encode_semi_utf8(&mut bits); + + unsafe { + self.insert_bytes(idx, bits); + } + } + + #[inline] + unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) { + let len = self.len(); + let amt = bytes.len(); + self.vec.reserve(amt); + + unsafe { + ptr::copy( + self.vec.as_ptr().add(idx), + self.vec.as_mut_ptr().add(idx + amt), + len - idx, + ); + ptr::copy_nonoverlapping(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt); + self.vec.set_len(len + amt); + } + } + + /// See [`String::insert_str`]. + /// + /// ``` + /// # use java_string::JavaString; + /// let mut s = JavaString::from("bar"); + /// s.insert_str(0, "foo"); + /// assert_eq!(s, "foobar"); + /// ``` + #[inline] + pub fn insert_str(&mut self, idx: usize, string: &str) { + assert!(self.is_char_boundary(idx)); + + unsafe { + self.insert_bytes(idx, string.as_bytes()); + } + } + + /// See [`String::insert_str`]. + pub fn insert_java_str(&mut self, idx: usize, string: &JavaStr) { + assert!(self.is_char_boundary(idx)); + + unsafe { + self.insert_bytes(idx, string.as_bytes()); + } + } + + /// See [`String::as_mut_vec`]. + /// + /// # Safety + /// + /// The returned `Vec` must not have invalid UTF-8 written to it, besides + /// surrogate pairs. + #[inline] + pub unsafe fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.vec + } + + /// See [`String::len`]. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.vec.len() + } + + /// See [`String::is_empty`]. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// See [`String::split_off`]. + /// + /// ``` + /// # use java_string::JavaString; + /// let mut hello = JavaString::from("Hello World!"); + /// let world = hello.split_off(6); + /// assert_eq!(hello, "Hello "); + /// assert_eq!(world, "World!"); + /// ``` + /// ```should_panic + /// # use java_string::JavaString; + /// let mut s = JavaString::from("๐Ÿฆ€"); + /// // Should panic + /// let _ = s.split_off(1); + /// ``` + #[inline] + #[must_use] + pub fn split_off(&mut self, at: usize) -> JavaString { + assert!(self.is_char_boundary(at)); + let other = self.vec.split_off(at); + unsafe { JavaString::from_semi_utf8_unchecked(other) } + } + + /// See [`String::clear`]. + #[inline] + pub fn clear(&mut self) { + self.vec.clear(); + } + + /// See [`String::drain`]. + /// + /// ``` + /// # use java_string::JavaString; + /// + /// let mut s = JavaString::from("ฮฑ is alpha, ฮฒ is beta"); + /// let beta_offset = s.find('ฮฒ').unwrap_or(s.len()); + /// + /// // Remove the range up until the ฮฒ from the string + /// let t: JavaString = s.drain(..beta_offset).collect(); + /// assert_eq!(t, "ฮฑ is alpha, "); + /// assert_eq!(s, "ฮฒ is beta"); + /// + /// // A full range clears the string, like `clear()` does + /// s.drain(..); + /// assert_eq!(s, ""); + /// ``` + #[inline] + pub fn drain(&mut self, range: R) -> Drain<'_> + where + R: RangeBounds, + { + // Memory safety: see String::drain + let Range { start, end } = to_range_checked(range, ..self.len()); + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // SAFETY: `to_range_checked` and `is_char_boundary` do the appropriate bounds + // checks. + let chars_iter = unsafe { self.get_unchecked(start..end) }.chars(); + + Drain { + start, + end, + iter: chars_iter, + string: self_ptr, + } + } + + /// See [`String::replace_range`]. + /// + /// ``` + /// # use java_string::JavaString; + /// + /// let mut s = JavaString::from("ฮฑ is alpha, ฮฒ is beta"); + /// let beta_offset = s.find('ฮฒ').unwrap_or(s.len()); + /// + /// // Replace the range up until the ฮฒ from the string + /// s.replace_range(..beta_offset, "ฮ‘ is capital alpha; "); + /// assert_eq!(s, "ฮ‘ is capital alpha; ฮฒ is beta"); + /// ``` + /// ```should_panic + /// # use java_string::JavaString; + /// let mut s = JavaString::from("ฮฑ is alpha, ฮฒ is beta"); + /// // Should panic + /// s.replace_range(..1, "ฮ‘ is capital alpha; "); + /// ``` + pub fn replace_range(&mut self, range: R, replace_with: &str) + where + R: RangeBounds, + { + self.replace_range_java(range, JavaStr::from_str(replace_with)) + } + + /// See [`String::replace_range`]. + pub fn replace_range_java(&mut self, range: R, replace_with: &JavaStr) + where + R: RangeBounds, + { + let start = range.start_bound(); + match start { + Bound::Included(&n) => assert!(self.is_char_boundary(n)), + Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)), + Bound::Unbounded => {} + }; + let end = range.end_bound(); + match end { + Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)), + Bound::Excluded(&n) => assert!(self.is_char_boundary(n)), + Bound::Unbounded => {} + }; + + unsafe { self.as_mut_vec() }.splice((start, end), replace_with.bytes()); + } + + /// See [`String::into_boxed_str`]. + #[inline] + #[must_use] + pub fn into_boxed_str(self) -> Box { + let slice = self.vec.into_boxed_slice(); + unsafe { JavaStr::from_boxed_semi_utf8_unchecked(slice) } + } + + /// See [`String::leak`]. + #[inline] + pub fn leak<'a>(self) -> &'a mut JavaStr { + let slice = self.vec.leak(); + unsafe { JavaStr::from_semi_utf8_unchecked_mut(slice) } + } +} + +impl Add<&str> for JavaString { + type Output = JavaString; + + #[inline] + fn add(mut self, rhs: &str) -> Self::Output { + self.push_str(rhs); + self + } +} + +impl Add<&JavaStr> for JavaString { + type Output = JavaString; + + #[inline] + fn add(mut self, rhs: &JavaStr) -> Self::Output { + self.push_java_str(rhs); + self + } +} + +impl AddAssign<&str> for JavaString { + #[inline] + fn add_assign(&mut self, rhs: &str) { + self.push_str(rhs); + } +} + +impl AddAssign<&JavaStr> for JavaString { + #[inline] + fn add_assign(&mut self, rhs: &JavaStr) { + self.push_java_str(rhs); + } +} + +impl AsMut for JavaString { + #[inline] + fn as_mut(&mut self) -> &mut JavaStr { + self.as_mut_java_str() + } +} + +impl AsRef<[u8]> for JavaString { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl AsRef for JavaString { + #[inline] + fn as_ref(&self) -> &JavaStr { + self.as_java_str() + } +} + +impl Borrow for JavaString { + #[inline] + fn borrow(&self) -> &JavaStr { + self.as_java_str() + } +} + +impl BorrowMut for JavaString { + #[inline] + fn borrow_mut(&mut self) -> &mut JavaStr { + self.as_mut_java_str() + } +} + +impl Clone for JavaString { + #[inline] + fn clone(&self) -> Self { + JavaString { + vec: self.vec.clone(), + } + } + + #[inline] + fn clone_from(&mut self, source: &Self) { + self.vec.clone_from(&source.vec) + } +} + +impl Debug for JavaString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Debug::fmt(&**self, f) + } +} + +impl Deref for JavaString { + type Target = JavaStr; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_java_str() + } +} + +impl DerefMut for JavaString { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + self.as_mut_java_str() + } +} + +impl Display for JavaString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Display::fmt(&**self, f) + } +} + +impl Extend for JavaString { + fn extend>(&mut self, iter: T) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + iterator.for_each(move |c| self.push(c)); + } +} + +impl Extend for JavaString { + fn extend>(&mut self, iter: T) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + iterator.for_each(move |c| self.push_java(c)); + } +} + +impl Extend for JavaString { + fn extend>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } +} + +impl Extend for JavaString { + fn extend>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_java_str(&s)); + } +} + +impl<'a> Extend<&'a char> for JavaString { + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().copied()) + } +} + +impl<'a> Extend<&'a JavaCodePoint> for JavaString { + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().copied()) + } +} + +impl<'a> Extend<&'a str> for JavaString { + fn extend>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_str(s)); + } +} + +impl<'a> Extend<&'a JavaStr> for JavaString { + fn extend>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_java_str(s)); + } +} + +impl Extend> for JavaString { + fn extend>>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } +} + +impl Extend> for JavaString { + fn extend>>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_java_str(&s)); + } +} + +impl<'a> Extend> for JavaString { + fn extend>>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } +} + +impl<'a> Extend> for JavaString { + fn extend>>(&mut self, iter: T) { + iter.into_iter().for_each(move |s| self.push_java_str(&s)); + } +} + +impl From for JavaString { + #[inline] + fn from(value: String) -> Self { + unsafe { + // SAFETY: value is valid UTF-8 + JavaString::from_semi_utf8_unchecked(value.into_bytes()) + } + } +} + +impl From<&String> for JavaString { + #[inline] + fn from(value: &String) -> Self { + Self::from(value.clone()) + } +} + +impl From<&JavaString> for JavaString { + #[inline] + fn from(value: &JavaString) -> Self { + value.clone() + } +} + +impl From<&mut str> for JavaString { + #[inline] + fn from(value: &mut str) -> Self { + Self::from(&*value) + } +} + +impl From<&str> for JavaString { + #[inline] + fn from(value: &str) -> Self { + Self::from(value.to_owned()) + } +} + +impl From<&mut JavaStr> for JavaString { + #[inline] + fn from(value: &mut JavaStr) -> Self { + Self::from(&*value) + } +} + +impl From<&JavaStr> for JavaString { + #[inline] + fn from(value: &JavaStr) -> Self { + value.to_owned() + } +} + +impl From> for JavaString { + #[inline] + fn from(value: Box) -> Self { + Self::from(value.into_string()) + } +} + +impl From> for JavaString { + #[inline] + fn from(value: Box) -> Self { + value.into_string() + } +} + +impl<'a> From> for JavaString { + #[inline] + fn from(value: Cow<'a, str>) -> Self { + Self::from(value.into_owned()) + } +} + +impl<'a> From> for JavaString { + #[inline] + fn from(value: Cow<'a, JavaStr>) -> Self { + value.into_owned() + } +} + +impl From for Arc { + #[inline] + fn from(value: JavaString) -> Self { + Arc::from(&value[..]) + } +} + +impl<'a> From for Cow<'a, JavaStr> { + #[inline] + fn from(value: JavaString) -> Self { + Cow::Owned(value) + } +} + +impl From for Rc { + #[inline] + fn from(value: JavaString) -> Self { + Rc::from(&value[..]) + } +} + +impl From for Vec { + #[inline] + fn from(value: JavaString) -> Self { + value.into_bytes() + } +} + +impl From for JavaString { + #[inline] + fn from(value: char) -> Self { + Self::from(value.encode_utf8(&mut [0; 4])) + } +} + +impl From for JavaString { + #[inline] + fn from(value: JavaCodePoint) -> Self { + unsafe { + // SAFETY: we're encoding into semi-valid UTF-8 + JavaString::from_semi_utf8_unchecked(value.encode_semi_utf8(&mut [0; 4]).to_vec()) + } + } +} + +impl FromIterator for JavaString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl<'a> FromIterator<&'a char> for JavaString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl FromIterator for JavaString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl<'a> FromIterator<&'a JavaCodePoint> for JavaString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl<'a> FromIterator<&'a str> for JavaString { + #[inline] + fn from_iter>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl FromIterator for JavaString { + fn from_iter>(iter: T) -> Self { + let mut iterator = iter.into_iter(); + + match iterator.next() { + None => JavaString::new(), + Some(buf) => { + let mut buf = JavaString::from(buf); + buf.extend(iterator); + buf + } + } + } +} + +impl FromIterator for JavaString { + fn from_iter>(iter: T) -> Self { + let mut iterator = iter.into_iter(); + + match iterator.next() { + None => JavaString::new(), + Some(mut buf) => { + buf.extend(iterator); + buf + } + } + } +} + +impl FromIterator> for JavaString { + #[inline] + fn from_iter>>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl FromIterator> for JavaString { + #[inline] + fn from_iter>>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl<'a> FromIterator> for JavaString { + #[inline] + fn from_iter>>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl<'a> FromIterator> for JavaString { + #[inline] + fn from_iter>>(iter: T) -> Self { + let mut buf = JavaString::new(); + buf.extend(iter); + buf + } +} + +impl FromStr for JavaString { + type Err = Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(Self::from(s)) + } +} + +impl Hash for JavaString { + #[inline] + fn hash(&self, state: &mut H) { + (**self).hash(state) + } +} + +impl Index> for JavaString { + type Output = JavaStr; + + #[inline] + fn index(&self, index: Range) -> &Self::Output { + &self[..][index] + } +} + +impl Index> for JavaString { + type Output = JavaStr; + + #[inline] + fn index(&self, index: RangeFrom) -> &Self::Output { + &self[..][index] + } +} + +impl Index for JavaString { + type Output = JavaStr; + + #[inline] + fn index(&self, _index: RangeFull) -> &Self::Output { + self.as_java_str() + } +} + +impl Index> for JavaString { + type Output = JavaStr; + + #[inline] + fn index(&self, index: RangeInclusive) -> &Self::Output { + &self[..][index] + } +} + +impl Index> for JavaString { + type Output = JavaStr; + + #[inline] + fn index(&self, index: RangeTo) -> &Self::Output { + &self[..][index] + } +} + +impl Index> for JavaString { + type Output = JavaStr; + + #[inline] + fn index(&self, index: RangeToInclusive) -> &Self::Output { + &self[..][index] + } +} + +impl IndexMut> for JavaString { + #[inline] + fn index_mut(&mut self, index: Range) -> &mut Self::Output { + &mut self[..][index] + } +} + +impl IndexMut> for JavaString { + #[inline] + fn index_mut(&mut self, index: RangeFrom) -> &mut Self::Output { + &mut self[..][index] + } +} + +impl IndexMut for JavaString { + #[inline] + fn index_mut(&mut self, _index: RangeFull) -> &mut Self::Output { + self.as_mut_java_str() + } +} + +impl IndexMut> for JavaString { + #[inline] + fn index_mut(&mut self, index: RangeInclusive) -> &mut Self::Output { + &mut self[..][index] + } +} + +impl IndexMut> for JavaString { + #[inline] + fn index_mut(&mut self, index: RangeTo) -> &mut Self::Output { + &mut self[..][index] + } +} + +impl IndexMut> for JavaString { + #[inline] + fn index_mut(&mut self, index: RangeToInclusive) -> &mut Self::Output { + &mut self[..][index] + } +} + +impl PartialEq for JavaString { + #[inline] + fn eq(&self, other: &str) -> bool { + self[..] == other + } +} + +impl PartialEq for str { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + self == other[..] + } +} + +impl<'a> PartialEq<&'a str> for JavaString { + #[inline] + fn eq(&self, other: &&'a str) -> bool { + self == *other + } +} + +impl<'a> PartialEq for &'a str { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + *self == other + } +} + +impl PartialEq for JavaString { + #[inline] + fn eq(&self, other: &String) -> bool { + &self[..] == other + } +} + +impl PartialEq for String { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + self == &other[..] + } +} + +impl PartialEq for JavaString { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + self[..] == other + } +} + +impl<'a> PartialEq<&'a JavaStr> for JavaString { + #[inline] + fn eq(&self, other: &&'a JavaStr) -> bool { + self == *other + } +} + +impl<'a> PartialEq> for JavaString { + #[inline] + fn eq(&self, other: &Cow<'a, str>) -> bool { + &self[..] == other + } +} + +impl<'a> PartialEq for Cow<'a, str> { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + self == &other[..] + } +} + +impl<'a> PartialEq> for JavaString { + #[inline] + fn eq(&self, other: &Cow<'a, JavaStr>) -> bool { + &self[..] == other + } +} + +impl<'a> PartialEq for Cow<'a, JavaStr> { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + self == &other[..] + } +} + +impl Write for JavaString { + #[inline] + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.push_str(s); + Ok(()) + } + + #[inline] + fn write_char(&mut self, c: char) -> std::fmt::Result { + self.push(c); + Ok(()) + } +} + +pub struct Drain<'a> { + string: *mut JavaString, + start: usize, + end: usize, + iter: Chars<'a>, +} + +impl Debug for Drain<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Drain").field(&self.as_str()).finish() + } +} + +unsafe impl Sync for Drain<'_> {} +unsafe impl Send for Drain<'_> {} + +impl Drop for Drain<'_> { + #[inline] + fn drop(&mut self) { + unsafe { + // Use Vec::drain. "Reaffirm" the bounds checks to avoid + // panic code being inserted again. + let self_vec = (*self.string).as_mut_vec(); + if self.start <= self.end && self.end <= self_vec.len() { + self_vec.drain(self.start..self.end); + } + } + } +} + +impl AsRef for Drain<'_> { + #[inline] + fn as_ref(&self) -> &JavaStr { + self.as_str() + } +} + +impl AsRef<[u8]> for Drain<'_> { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +impl Drain<'_> { + #[inline] + #[must_use] + pub fn as_str(&self) -> &JavaStr { + self.iter.as_str() + } +} + +impl Iterator for Drain<'_> { + type Item = JavaCodePoint; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + #[inline] + fn last(mut self) -> Option { + self.next_back() + } +} + +impl DoubleEndedIterator for Drain<'_> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +impl FusedIterator for Drain<'_> {} diff --git a/java_string/src/pattern.rs b/java_string/src/pattern.rs new file mode 100644 index 0000000..a17d374 --- /dev/null +++ b/java_string/src/pattern.rs @@ -0,0 +1,358 @@ +use crate::{JavaCodePoint, JavaStr}; + +mod private_pattern { + use crate::{JavaCodePoint, JavaStr}; + + pub trait Sealed {} + + impl Sealed for char {} + impl Sealed for JavaCodePoint {} + impl Sealed for &str {} + impl Sealed for &JavaStr {} + impl Sealed for F where F: FnMut(JavaCodePoint) -> bool {} + impl Sealed for &[char] {} + impl Sealed for &[JavaCodePoint] {} + impl Sealed for &char {} + impl Sealed for &JavaCodePoint {} + impl Sealed for &&str {} + impl Sealed for &&JavaStr {} +} + +/// # Safety +/// +/// Methods in this trait must only return indexes that are on char boundaries +pub unsafe trait JavaStrPattern: private_pattern::Sealed { + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option; + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option; + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)>; + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)>; +} + +unsafe impl JavaStrPattern for char { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next()?; + (ch == *self).then(|| ch.len_utf8()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next_back()?; + (ch == *self).then(|| ch.len_utf8()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut encoded = [0; 4]; + let encoded = self.encode_utf8(&mut encoded).as_bytes(); + find(haystack.as_bytes(), encoded).map(|index| (index, encoded.len())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut encoded = [0; 4]; + let encoded = self.encode_utf8(&mut encoded).as_bytes(); + rfind(haystack.as_bytes(), encoded).map(|index| (index, encoded.len())) + } +} + +unsafe impl JavaStrPattern for JavaCodePoint { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next()?; + (ch == *self).then(|| ch.len_utf8()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next_back()?; + (ch == *self).then(|| ch.len_utf8()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut encoded = [0; 4]; + let encoded = self.encode_semi_utf8(&mut encoded); + find(haystack.as_bytes(), encoded).map(|index| (index, encoded.len())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut encoded = [0; 4]; + let encoded = self.encode_semi_utf8(&mut encoded); + rfind(haystack.as_bytes(), encoded).map(|index| (index, encoded.len())) + } +} + +unsafe impl JavaStrPattern for &str { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + haystack + .as_bytes() + .starts_with(self.as_bytes()) + .then_some(self.len()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + haystack + .as_bytes() + .ends_with(self.as_bytes()) + .then_some(self.len()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + find(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + rfind(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len())) + } +} + +unsafe impl JavaStrPattern for &JavaStr { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + haystack + .as_bytes() + .starts_with(self.as_bytes()) + .then(|| self.len()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + haystack + .as_bytes() + .ends_with(self.as_bytes()) + .then(|| self.len()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + find(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + rfind(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len())) + } +} + +unsafe impl JavaStrPattern for F +where + F: FnMut(JavaCodePoint) -> bool, +{ + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next()?; + self(ch).then(|| ch.len_utf8()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next_back()?; + self(ch).then(|| ch.len_utf8()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + haystack + .char_indices() + .find(|(_, ch)| self(*ch)) + .map(|(index, ch)| (index, ch.len_utf8())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + haystack + .char_indices() + .rfind(|(_, ch)| self(*ch)) + .map(|(index, ch)| (index, ch.len_utf8())) + } +} + +unsafe impl JavaStrPattern for &[char] { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next()?; + self.iter().any(|c| ch == *c).then(|| ch.len_utf8()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next_back()?; + self.iter().any(|c| ch == *c).then(|| ch.len_utf8()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + haystack + .char_indices() + .find(|(_, ch)| self.iter().any(|c| *ch == *c)) + .map(|(index, ch)| (index, ch.len_utf8())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + haystack + .char_indices() + .rfind(|(_, ch)| self.iter().any(|c| *ch == *c)) + .map(|(index, ch)| (index, ch.len_utf8())) + } +} + +unsafe impl JavaStrPattern for &[JavaCodePoint] { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next()?; + self.contains(&ch).then(|| ch.len_utf8()) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let ch = haystack.chars().next_back()?; + self.contains(&ch).then(|| ch.len_utf8()) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + haystack + .char_indices() + .find(|(_, ch)| self.contains(ch)) + .map(|(index, ch)| (index, ch.len_utf8())) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + haystack + .char_indices() + .rfind(|(_, ch)| self.contains(ch)) + .map(|(index, ch)| (index, ch.len_utf8())) + } +} + +unsafe impl JavaStrPattern for &char { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut ch = **self; + ch.prefix_len_in(haystack) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut ch = **self; + ch.suffix_len_in(haystack) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut ch = **self; + ch.find_in(haystack) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut ch = **self; + ch.rfind_in(haystack) + } +} + +unsafe impl JavaStrPattern for &JavaCodePoint { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut ch = **self; + ch.prefix_len_in(haystack) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut ch = **self; + ch.suffix_len_in(haystack) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut ch = **self; + ch.find_in(haystack) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut ch = **self; + ch.rfind_in(haystack) + } +} + +unsafe impl JavaStrPattern for &&str { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut str = **self; + str.prefix_len_in(haystack) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut str = **self; + str.suffix_len_in(haystack) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut str = **self; + str.find_in(haystack) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut str = **self; + str.rfind_in(haystack) + } +} + +unsafe impl JavaStrPattern for &&JavaStr { + #[inline] + fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut str = **self; + str.prefix_len_in(haystack) + } + + #[inline] + fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { + let mut str = **self; + str.suffix_len_in(haystack) + } + + #[inline] + fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut str = **self; + str.find_in(haystack) + } + + #[inline] + fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> { + let mut str = **self; + str.rfind_in(haystack) + } +} + +#[inline] +fn find(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() { + return Some(0); + } + haystack + .windows(needle.len()) + .position(|window| window == needle) +} + +#[inline] +fn rfind(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() { + return Some(haystack.len()); + } + haystack + .windows(needle.len()) + .rposition(|window| window == needle) +} diff --git a/java_string/src/serde.rs b/java_string/src/serde.rs new file mode 100644 index 0000000..5743313 --- /dev/null +++ b/java_string/src/serde.rs @@ -0,0 +1,262 @@ +use std::fmt::Formatter; + +use serde::de::value::SeqAccessDeserializer; +use serde::de::{Error, SeqAccess, Unexpected, Visitor}; +use serde::ser::SerializeSeq; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use crate::{JavaCodePoint, JavaStr, JavaString}; + +impl Serialize for JavaString { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self.as_str() { + Ok(str) => str.serialize(serializer), + Err(_) => { + let mut seq = serializer.serialize_seq(None)?; + for ch in self.chars() { + seq.serialize_element(&ch.as_u32())?; + } + seq.end() + } + } + } +} + +impl<'de> Deserialize<'de> for JavaString { + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(JavaStringVisitor) + } +} + +struct JavaStringVisitor; + +impl<'de> Visitor<'de> for JavaStringVisitor { + type Value = JavaString; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a JavaString") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(JavaString::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(JavaString::from(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match JavaStr::from_semi_utf8(v) { + Ok(str) => Ok(str.to_owned()), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + JavaString::from_semi_utf8(v) + .map_err(|err| Error::invalid_value(Unexpected::Bytes(&err.into_bytes()), &self)) + } + + fn visit_seq(self, seq: A) -> Result + where + A: SeqAccess<'de>, + { + let vec = Vec::::deserialize(SeqAccessDeserializer::new(seq))?; + JavaString::from_semi_utf8(vec).map_err(|_| Error::invalid_value(Unexpected::Seq, &self)) + } +} + +impl Serialize for JavaStr { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self.as_str() { + Ok(str) => str.serialize(serializer), + Err(_) => { + let mut seq = serializer.serialize_seq(None)?; + for ch in self.chars() { + seq.serialize_element(&ch.as_u32())?; + } + seq.end() + } + } + } +} + +impl<'de: 'a, 'a> Deserialize<'de> for &'a JavaStr { + #[inline] + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(JavaStrVisitor) + } +} + +struct JavaStrVisitor; + +impl<'de> Visitor<'de> for JavaStrVisitor { + type Value = &'de JavaStr; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a borrowed JavaStr") + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: Error, + { + Ok(JavaStr::from_str(v)) + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: Error, + { + JavaStr::from_semi_utf8(v).map_err(|_| Error::invalid_value(Unexpected::Bytes(v), &self)) + } +} + +impl Serialize for JavaCodePoint { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self.as_char() { + Some(ch) => ch.serialize(serializer), + None => self.as_u32().serialize(serializer), + } + } +} + +impl<'de> Deserialize<'de> for JavaCodePoint { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(JavaCodePointVisitor) + } +} + +struct JavaCodePointVisitor; + +impl<'de> Visitor<'de> for JavaCodePointVisitor { + type Value = JavaCodePoint; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a character") + } + + #[inline] + fn visit_i8(self, v: i8) -> Result + where + E: Error, + { + self.visit_i32(v.into()) + } + + #[inline] + fn visit_i16(self, v: i16) -> Result + where + E: Error, + { + self.visit_i32(v.into()) + } + + fn visit_i32(self, v: i32) -> Result + where + E: Error, + { + if v < 0 { + Err(Error::invalid_value(Unexpected::Signed(v.into()), &self)) + } else { + self.visit_u32(v as u32) + } + } + + fn visit_i64(self, v: i64) -> Result + where + E: Error, + { + if v < 0 { + Err(Error::invalid_value(Unexpected::Signed(v), &self)) + } else { + self.visit_u64(v as u64) + } + } + + #[inline] + fn visit_u8(self, v: u8) -> Result + where + E: Error, + { + self.visit_u32(v.into()) + } + + #[inline] + fn visit_u16(self, v: u16) -> Result + where + E: Error, + { + self.visit_u32(v.into()) + } + + fn visit_u32(self, v: u32) -> Result + where + E: Error, + { + JavaCodePoint::from_u32(v) + .ok_or_else(|| Error::invalid_value(Unexpected::Unsigned(v.into()), &self)) + } + + fn visit_u64(self, v: u64) -> Result + where + E: Error, + { + match u32::try_from(v) { + Ok(v) => self.visit_u32(v), + Err(_) => Err(Error::invalid_value(Unexpected::Unsigned(v), &self)), + } + } + + fn visit_char(self, v: char) -> Result + where + E: Error, + { + Ok(JavaCodePoint::from_char(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + let mut iter = v.chars(); + match (iter.next(), iter.next()) { + (Some(c), None) => Ok(JavaCodePoint::from_char(c)), + _ => Err(Error::invalid_value(Unexpected::Str(v), &self)), + } + } +} diff --git a/java_string/src/slice.rs b/java_string/src/slice.rs new file mode 100644 index 0000000..0548ae6 --- /dev/null +++ b/java_string/src/slice.rs @@ -0,0 +1,2261 @@ +use std::borrow::Cow; +use std::collections::Bound; +use std::fmt::{Debug, Display, Formatter, Write}; +use std::hash::{Hash, Hasher}; +use std::ops::{ + Add, AddAssign, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, + RangeTo, RangeToInclusive, +}; +use std::rc::Rc; +use std::str::FromStr; +use std::sync::Arc; +use std::{ptr, slice}; + +use crate::char::EscapeDebugExtArgs; +use crate::validations::{ + run_utf8_full_validation_from_semi, run_utf8_semi_validation, slice_error_fail, + str_end_index_overflow_fail, +}; +use crate::{ + Bytes, CharEscapeIter, CharIndices, Chars, EscapeDebug, EscapeDefault, EscapeUnicode, + JavaCodePoint, JavaStrPattern, JavaString, Lines, MatchIndices, Matches, ParseError, + RMatchIndices, RMatches, RSplit, RSplitN, RSplitTerminator, Split, SplitAsciiWhitespace, + SplitInclusive, SplitN, SplitTerminator, SplitWhitespace, Utf8Error, +}; + +#[derive(PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct JavaStr { + inner: [u8], +} + +impl JavaStr { + /// Converts `v` to a `&JavaStr` if it is fully-valid UTF-8, i.e. UTF-8 + /// without surrogate code points. See [`std::str::from_utf8`]. + #[inline] + pub const fn from_full_utf8(v: &[u8]) -> Result<&JavaStr, Utf8Error> { + match std::str::from_utf8(v) { + Ok(str) => Ok(JavaStr::from_str(str)), + Err(err) => Err(Utf8Error::from_std(err)), + } + } + + /// Converts `v` to a `&mut JavaStr` if it is fully-valid UTF-8, i.e. UTF-8 + /// without surrogate code points. See [`std::str::from_utf8_mut`]. + #[inline] + pub fn from_full_utf8_mut(v: &mut [u8]) -> Result<&mut JavaStr, Utf8Error> { + match std::str::from_utf8_mut(v) { + Ok(str) => Ok(JavaStr::from_mut_str(str)), + Err(err) => Err(Utf8Error::from_std(err)), + } + } + + /// Converts `v` to a `&JavaStr` if it is semi-valid UTF-8, i.e. UTF-8 + /// with surrogate code points. + pub fn from_semi_utf8(v: &[u8]) -> Result<&JavaStr, Utf8Error> { + match run_utf8_semi_validation(v) { + Ok(()) => Ok(unsafe { JavaStr::from_semi_utf8_unchecked(v) }), + Err(err) => Err(err), + } + } + + /// Converts `v` to a `&mut JavaStr` if it is semi-valid UTF-8, i.e. UTF-8 + /// with surrogate code points. + pub fn from_semi_utf8_mut(v: &mut [u8]) -> Result<&mut JavaStr, Utf8Error> { + match run_utf8_semi_validation(v) { + Ok(()) => Ok(unsafe { JavaStr::from_semi_utf8_unchecked_mut(v) }), + Err(err) => Err(err), + } + } + + /// # Safety + /// + /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus + /// surrogate code points. + #[inline] + #[must_use] + pub const unsafe fn from_semi_utf8_unchecked(v: &[u8]) -> &JavaStr { + // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8, minus + // the absence of surrogate chars. Also relies on `&JavaStr` and `&[u8]` + // having the same layout. + std::mem::transmute(v) + } + + /// # Safety + /// + /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus + /// surrogate code points. + #[inline] + #[must_use] + pub unsafe fn from_semi_utf8_unchecked_mut(v: &mut [u8]) -> &mut JavaStr { + // SAFETY: see from_semi_utf8_unchecked + std::mem::transmute(v) + } + + #[inline] + #[must_use] + pub const fn from_str(str: &str) -> &JavaStr { + unsafe { + // SAFETY: the input str is guaranteed to have valid UTF-8. + JavaStr::from_semi_utf8_unchecked(str.as_bytes()) + } + } + + #[inline] + #[must_use] + pub fn from_mut_str(str: &mut str) -> &mut JavaStr { + unsafe { + // SAFETY: the input str is guaranteed to have valid UTF-8. + JavaStr::from_semi_utf8_unchecked_mut(str.as_bytes_mut()) + } + } + + #[inline] + #[must_use] + pub fn from_boxed_str(v: Box) -> Box { + unsafe { JavaStr::from_boxed_semi_utf8_unchecked(v.into_boxed_bytes()) } + } + + /// # Safety + /// + /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus + /// surrogate code points. + #[inline] + #[must_use] + pub unsafe fn from_boxed_semi_utf8_unchecked(v: Box<[u8]>) -> Box { + unsafe { Box::from_raw(Box::into_raw(v) as *mut JavaStr) } + } + + /// See [`str::as_bytes`]. + #[inline] + #[must_use] + pub const fn as_bytes(&self) -> &[u8] { + &self.inner + } + + /// See [`str::as_bytes_mut`]. + /// + /// # Safety + /// + /// The returned slice must not have invalid UTF-8 written to it, besides + /// surrogate pairs. + #[inline] + #[must_use] + pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] { + &mut self.inner + } + + /// See [`str::as_mut_ptr`]. + #[inline] + #[must_use] + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.inner.as_mut_ptr() + } + + /// See [`str::as_ptr`]. + #[inline] + #[must_use] + pub const fn as_ptr(&self) -> *const u8 { + self.inner.as_ptr() + } + + /// Tries to convert this `&JavaStr` to a `&str`, returning an error if + /// it is not fully valid UTF-8, i.e. has no surrogate code points. + pub const fn as_str(&self) -> Result<&str, Utf8Error> { + // Manual implementation of Option::map since it's not const + match run_utf8_full_validation_from_semi(self.as_bytes()) { + Ok(..) => unsafe { + // SAFETY: we were already semi-valid, and full validation just succeeded. + Ok(self.as_str_unchecked()) + }, + Err(err) => Err(err), + } + } + + /// # Safety + /// + /// This string must be fully valid UTF-8, i.e. have no surrogate code + /// points. + #[inline] + #[must_use] + pub const unsafe fn as_str_unchecked(&self) -> &str { + std::str::from_utf8_unchecked(self.as_bytes()) + } + + /// Converts this `&JavaStr` to a `Cow`, replacing surrogate code + /// points with the replacement character ๏ฟฝ. + /// + /// ``` + /// # use std::borrow::Cow; + /// # use java_string::{JavaCodePoint, JavaStr, JavaString}; + /// let s = JavaStr::from_str("Hello ๐Ÿฆ€ World!"); + /// let result = s.as_str_lossy(); + /// assert!(matches!(result, Cow::Borrowed(_))); + /// assert_eq!(result, "Hello ๐Ÿฆ€ World!"); + /// + /// let s = JavaString::from("Hello ") + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() + /// + JavaStr::from_str(" World!"); + /// let result = s.as_str_lossy(); + /// assert!(matches!(result, Cow::Owned(_))); + /// assert_eq!(result, "Hello ๏ฟฝ World!"); + /// ``` + #[must_use] + pub fn as_str_lossy(&self) -> Cow<'_, str> { + match run_utf8_full_validation_from_semi(self.as_bytes()) { + Ok(()) => unsafe { + // SAFETY: validation succeeded + Cow::Borrowed(self.as_str_unchecked()) + }, + Err(error) => unsafe { + // SAFETY: invalid parts of string are converted to replacement char + Cow::Owned( + self.transform_invalid_string(error, str::to_owned, |_| { + JavaStr::from_str("\u{FFFD}") + }) + .into_string_unchecked(), + ) + }, + } + } + + /// See [`str::bytes`]. + #[inline] + pub fn bytes(&self) -> Bytes<'_> { + Bytes { + inner: self.inner.iter().copied(), + } + } + + /// See [`str::char_indices`]. + #[inline] + pub fn char_indices(&self) -> CharIndices<'_> { + CharIndices { + front_offset: 0, + inner: self.chars(), + } + } + + /// See [`str::chars`]. + #[inline] + pub fn chars(&self) -> Chars<'_> { + Chars { + inner: self.inner.iter(), + } + } + + /// See [`str::contains`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let bananas = JavaStr::from_str("bananas"); + /// + /// assert!(bananas.contains("nana")); + /// assert!(!bananas.contains("apples")); + /// ``` + #[inline] + #[must_use] + pub fn contains

(&self, mut pat: P) -> bool + where + P: JavaStrPattern, + { + pat.find_in(self).is_some() + } + + /// See [`str::ends_with`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let bananas = JavaStr::from_str("bananas"); + /// + /// assert!(bananas.ends_with("anas")); + /// assert!(!bananas.ends_with("nana")); + /// ``` + #[inline] + #[must_use] + pub fn ends_with

(&self, mut pat: P) -> bool + where + P: JavaStrPattern, + { + pat.suffix_len_in(self).is_some() + } + + /// See [`str::eq_ignore_ascii_case`]. + #[inline] + #[must_use] + pub fn eq_ignore_ascii_case(&self, other: &str) -> bool { + self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) + } + + /// See [`str::eq_ignore_ascii_case`]. + #[inline] + #[must_use] + pub fn eq_java_ignore_ascii_case(&self, other: &JavaStr) -> bool { + self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) + } + + /// See [`str::escape_debug`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!( + /// JavaStr::from_str("โค\n!").escape_debug().to_string(), + /// "โค\\n!" + /// ); + /// ``` + #[inline] + pub fn escape_debug(&self) -> EscapeDebug<'_> { + #[inline] + fn escape_first(first: JavaCodePoint) -> CharEscapeIter { + first.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL) + } + #[inline] + fn escape_rest(char: JavaCodePoint) -> CharEscapeIter { + char.escape_debug_ext(EscapeDebugExtArgs { + escape_single_quote: true, + escape_double_quote: true, + }) + } + + let mut chars = self.chars(); + EscapeDebug { + inner: chars + .next() + .map(escape_first as fn(JavaCodePoint) -> CharEscapeIter) + .into_iter() + .flatten() + .chain(chars.flat_map(escape_rest as fn(JavaCodePoint) -> CharEscapeIter)), + } + } + + /// See [`str::escape_default`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!( + /// JavaStr::from_str("โค\n!").escape_default().to_string(), + /// "\\u{2764}\\n!" + /// ); + /// ``` + #[inline] + pub fn escape_default(&self) -> EscapeDefault<'_> { + EscapeDefault { + inner: self.chars().flat_map(JavaCodePoint::escape_default), + } + } + + /// See [`str::escape_unicode`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!( + /// JavaStr::from_str("โค\n!").escape_unicode().to_string(), + /// "\\u{2764}\\u{a}\\u{21}" + /// ); + /// ``` + #[inline] + pub fn escape_unicode(&self) -> EscapeUnicode<'_> { + EscapeUnicode { + inner: self.chars().flat_map(JavaCodePoint::escape_unicode), + } + } + + /// See [`str::find`]. + /// + /// ``` + /// let s = "Lรถwe ่€่™Ž Lรฉopard Gepardi"; + /// + /// assert_eq!(s.find('L'), Some(0)); + /// assert_eq!(s.find('รฉ'), Some(14)); + /// assert_eq!(s.find("pard"), Some(17)); + /// + /// let x: &[_] = &['1', '2']; + /// assert_eq!(s.find(x), None); + /// ``` + #[inline] + #[must_use] + pub fn find

(&self, mut pat: P) -> Option + where + P: JavaStrPattern, + { + pat.find_in(self).map(|(index, _)| index) + } + + /// See [`str::get`]. + /// + /// ``` + /// # use java_string::{JavaStr, JavaString}; + /// let v = JavaString::from("๐Ÿ—ปโˆˆ๐ŸŒ"); + /// + /// assert_eq!(Some(JavaStr::from_str("๐Ÿ—ป")), v.get(0..4)); + /// + /// // indices not on UTF-8 sequence boundaries + /// assert!(v.get(1..).is_none()); + /// assert!(v.get(..8).is_none()); + /// + /// // out of bounds + /// assert!(v.get(..42).is_none()); + /// ``` + #[inline] + #[must_use] + pub fn get(&self, i: I) -> Option<&JavaStr> + where + I: JavaStrSliceIndex, + { + i.get(self) + } + + /// See [`str::get_mut`]. + #[inline] + #[must_use] + pub fn get_mut(&mut self, i: I) -> Option<&mut JavaStr> + where + I: JavaStrSliceIndex, + { + i.get_mut(self) + } + + /// See [`str::get_unchecked`]. + /// + /// # Safety + /// + /// - The starting index must not exceed the ending index + /// - Indexes must be within bounds of the original slice + /// - Indexes must lie on UTF-8 sequence boundaries + #[inline] + #[must_use] + pub unsafe fn get_unchecked(&self, i: I) -> &JavaStr + where + I: JavaStrSliceIndex, + { + unsafe { &*i.get_unchecked(self) } + } + + /// See [`str::get_unchecked_mut`]. + /// + /// # Safety + /// + /// - The starting index must not exceed the ending index + /// - Indexes must be within bounds of the original slice + /// - Indexes must lie on UTF-8 sequence boundaries + #[inline] + #[must_use] + pub unsafe fn get_unchecked_mut(&mut self, i: I) -> &mut JavaStr + where + I: JavaStrSliceIndex, + { + unsafe { &mut *i.get_unchecked_mut(self) } + } + + /// See [`str::into_boxed_bytes`]. + #[inline] + #[must_use] + pub fn into_boxed_bytes(self: Box) -> Box<[u8]> { + unsafe { Box::from_raw(Box::into_raw(self) as *mut [u8]) } + } + + /// See [`str::into_string`]. + #[inline] + #[must_use] + pub fn into_string(self: Box) -> JavaString { + let slice = self.into_boxed_bytes(); + unsafe { JavaString::from_semi_utf8_unchecked(slice.into_vec()) } + } + + /// See [`str::is_ascii`]. + #[inline] + #[must_use] + pub fn is_ascii(&self) -> bool { + self.as_bytes().is_ascii() + } + + /// See [`str::is_char_boundary`]. + #[inline] + #[must_use] + pub fn is_char_boundary(&self, index: usize) -> bool { + // 0 is always ok. + // Test for 0 explicitly so that it can optimize out the check + // easily and skip reading string data for that case. + // Note that optimizing `self.get(..index)` relies on this. + if index == 0 { + return true; + } + + match self.as_bytes().get(index) { + // For `None` we have two options: + // + // - index == self.len() Empty strings are valid, so return true + // - index > self.len() In this case return false + // + // The check is placed exactly here, because it improves generated + // code on higher opt-levels. See https://github.com/rust-lang/rust/pull/84751 for more details. + None => index == self.len(), + + Some(&b) => { + // This is bit magic equivalent to: b < 128 || b >= 192 + (b as i8) >= -0x40 + } + } + } + + pub(crate) fn floor_char_boundary(&self, index: usize) -> usize { + if index >= self.len() { + self.len() + } else { + let lower_bound = index.saturating_sub(3); + let new_index = self.as_bytes()[lower_bound..=index].iter().rposition(|b| { + // This is bit magic equivalent to: b < 128 || b >= 192 + (*b as i8) >= -0x40 + }); + + // SAFETY: we know that the character boundary will be within four bytes + unsafe { lower_bound + new_index.unwrap_unchecked() } + } + } + + /// See [`str::is_empty`]. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// See [`str::len`]. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.inner.len() + } + + /// See [`str::lines`]. + #[inline] + pub fn lines(&self) -> Lines<'_> { + Lines { + inner: self.split_inclusive('\n').map(|line| { + let Some(line) = line.strip_suffix('\n') else { + return line; + }; + let Some(line) = line.strip_suffix('\r') else { + return line; + }; + line + }), + } + } + + /// See [`str::make_ascii_lowercase`]. + #[inline] + pub fn make_ascii_lowercase(&mut self) { + // SAFETY: changing ASCII letters only does not invalidate UTF-8. + let me = unsafe { self.as_bytes_mut() }; + me.make_ascii_lowercase() + } + + /// See [`str::make_ascii_uppercase`]. + #[inline] + pub fn make_ascii_uppercase(&mut self) { + // SAFETY: changing ASCII letters only does not invalidate UTF-8. + let me = unsafe { self.as_bytes_mut() }; + me.make_ascii_uppercase() + } + + /// See [`str::match_indices`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<_> = JavaStr::from_str("abcXXXabcYYYabc") + /// .match_indices("abc") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// (0, JavaStr::from_str("abc")), + /// (6, JavaStr::from_str("abc")), + /// (12, JavaStr::from_str("abc")) + /// ] + /// ); + /// + /// let v: Vec<_> = JavaStr::from_str("1abcabc2").match_indices("abc").collect(); + /// assert_eq!( + /// v, + /// [(1, JavaStr::from_str("abc")), (4, JavaStr::from_str("abc"))] + /// ); + /// + /// let v: Vec<_> = JavaStr::from_str("ababa").match_indices("aba").collect(); + /// assert_eq!(v, [(0, JavaStr::from_str("aba"))]); // only the first `aba` + /// ``` + #[inline] + pub fn match_indices

(&self, pat: P) -> MatchIndices

+ where + P: JavaStrPattern, + { + MatchIndices { + str: self, + start: 0, + pat, + } + } + + /// See [`str::matches`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// let v: Vec<&JavaStr> = JavaStr::from_str("abcXXXabcYYYabc") + /// .matches("abc") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("abc"), + /// JavaStr::from_str("abc"), + /// JavaStr::from_str("abc") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("1abc2abc3") + /// .matches(JavaCodePoint::is_numeric) + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("1"), + /// JavaStr::from_str("2"), + /// JavaStr::from_str("3") + /// ] + /// ); + /// ``` + #[inline] + pub fn matches

(&self, pat: P) -> Matches

+ where + P: JavaStrPattern, + { + Matches { str: self, pat } + } + + /// See [`str::parse`]. + #[inline] + pub fn parse(&self) -> Result::Err>> + where + F: FromStr, + { + match self.as_str() { + Ok(str) => str.parse().map_err(ParseError::Err), + Err(err) => Err(ParseError::InvalidUtf8(err)), + } + } + + /// See [`str::repeat`]. + #[inline] + #[must_use] + pub fn repeat(&self, n: usize) -> JavaString { + unsafe { JavaString::from_semi_utf8_unchecked(self.as_bytes().repeat(n)) } + } + + /// See [`str::replace`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let s = JavaStr::from_str("this is old"); + /// + /// assert_eq!("this is new", s.replace("old", "new")); + /// assert_eq!("than an old", s.replace("is", "an")); + /// ``` + #[inline] + #[must_use] + pub fn replace

(&self, from: P, to: &str) -> JavaString + where + P: JavaStrPattern, + { + self.replace_java(from, JavaStr::from_str(to)) + } + + /// See [`str::replace`]. + #[inline] + #[must_use] + pub fn replace_java

(&self, from: P, to: &JavaStr) -> JavaString + where + P: JavaStrPattern, + { + let mut result = JavaString::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from) { + result.push_java_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_java_str(to); + last_end = start + part.len(); + } + result.push_java_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// See [`str::replacen`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// let s = JavaStr::from_str("foo foo 123 foo"); + /// assert_eq!("new new 123 foo", s.replacen("foo", "new", 2)); + /// assert_eq!("faa fao 123 foo", s.replacen('o', "a", 3)); + /// assert_eq!( + /// "foo foo new23 foo", + /// s.replacen(JavaCodePoint::is_numeric, "new", 1) + /// ); + /// ``` + #[inline] + #[must_use] + pub fn replacen

(&self, from: P, to: &str, count: usize) -> JavaString + where + P: JavaStrPattern, + { + self.replacen_java(from, JavaStr::from_str(to), count) + } + + /// See [`str::replacen`]. + #[inline] + #[must_use] + pub fn replacen_java

(&self, from: P, to: &JavaStr, count: usize) -> JavaString + where + P: JavaStrPattern, + { + // Hope to reduce the times of re-allocation + let mut result = JavaString::with_capacity(32); + let mut last_end = 0; + for (start, part) in self.match_indices(from).take(count) { + result.push_java_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_java_str(to); + last_end = start + part.len(); + } + result.push_java_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// See [`str::rfind`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let s = JavaStr::from_str("Lรถwe ่€่™Ž Lรฉopard Gepardi"); + /// + /// assert_eq!(s.rfind('L'), Some(13)); + /// assert_eq!(s.rfind('รฉ'), Some(14)); + /// assert_eq!(s.rfind("pard"), Some(24)); + /// + /// let x: &[_] = &['1', '2']; + /// assert_eq!(s.rfind(x), None); + /// ``` + #[inline] + #[must_use] + pub fn rfind

(&self, mut pat: P) -> Option + where + P: JavaStrPattern, + { + pat.rfind_in(self).map(|(index, _)| index) + } + + /// See [`str::rmatch_indices`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<_> = JavaStr::from_str("abcXXXabcYYYabc") + /// .rmatch_indices("abc") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// (12, JavaStr::from_str("abc")), + /// (6, JavaStr::from_str("abc")), + /// (0, JavaStr::from_str("abc")) + /// ] + /// ); + /// + /// let v: Vec<_> = JavaStr::from_str("1abcabc2") + /// .rmatch_indices("abc") + /// .collect(); + /// assert_eq!( + /// v, + /// [(4, JavaStr::from_str("abc")), (1, JavaStr::from_str("abc"))] + /// ); + /// + /// let v: Vec<_> = JavaStr::from_str("ababa").rmatch_indices("aba").collect(); + /// assert_eq!(v, [(2, JavaStr::from_str("aba"))]); // only the last `aba` + /// ``` + #[inline] + pub fn rmatch_indices

(&self, pat: P) -> RMatchIndices

+ where + P: JavaStrPattern, + { + RMatchIndices { + inner: self.match_indices(pat), + } + } + + /// See [`str::rmatches`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// let v: Vec<&JavaStr> = JavaStr::from_str("abcXXXabcYYYabc") + /// .rmatches("abc") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("abc"), + /// JavaStr::from_str("abc"), + /// JavaStr::from_str("abc") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("1abc2abc3") + /// .rmatches(JavaCodePoint::is_numeric) + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("3"), + /// JavaStr::from_str("2"), + /// JavaStr::from_str("1") + /// ] + /// ); + /// ``` + #[inline] + pub fn rmatches

(&self, pat: P) -> RMatches

+ where + P: JavaStrPattern, + { + RMatches { + inner: self.matches(pat), + } + } + + /// See [`str::rsplit`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb") + /// .rsplit(' ') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("lamb"), + /// JavaStr::from_str("little"), + /// JavaStr::from_str("a"), + /// JavaStr::from_str("had"), + /// JavaStr::from_str("Mary") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("").rsplit('X').collect(); + /// assert_eq!(v, [JavaStr::from_str("")]); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard") + /// .rsplit('X') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("leopard"), + /// JavaStr::from_str("tiger"), + /// JavaStr::from_str(""), + /// JavaStr::from_str("lion") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lion::tiger::leopard") + /// .rsplit("::") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("leopard"), + /// JavaStr::from_str("tiger"), + /// JavaStr::from_str("lion") + /// ] + /// ); + /// ``` + #[inline] + pub fn rsplit

(&self, pat: P) -> RSplit

+ where + P: JavaStrPattern, + { + RSplit::new(self, pat) + } + + /// See [`str::rsplit_once`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!(JavaStr::from_str("cfg").rsplit_once('='), None); + /// assert_eq!( + /// JavaStr::from_str("cfg=foo").rsplit_once('='), + /// Some((JavaStr::from_str("cfg"), JavaStr::from_str("foo"))) + /// ); + /// assert_eq!( + /// JavaStr::from_str("cfg=foo=bar").rsplit_once('='), + /// Some((JavaStr::from_str("cfg=foo"), JavaStr::from_str("bar"))) + /// ); + /// ``` + #[inline] + #[must_use] + pub fn rsplit_once

(&self, mut delimiter: P) -> Option<(&JavaStr, &JavaStr)> + where + P: JavaStrPattern, + { + let (index, len) = delimiter.rfind_in(self)?; + // SAFETY: pattern is known to return valid indices. + unsafe { + Some(( + self.get_unchecked(..index), + self.get_unchecked(index + len..), + )) + } + } + + /// See [`str::rsplit_terminator`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B.").rsplit_terminator('.').collect(); + /// assert_eq!(v, [JavaStr::from_str("B"), JavaStr::from_str("A")]); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("A..B..").rsplit_terminator(".").collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str(""), + /// JavaStr::from_str("B"), + /// JavaStr::from_str(""), + /// JavaStr::from_str("A") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B:C.D") + /// .rsplit_terminator(&['.', ':'][..]) + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("D"), + /// JavaStr::from_str("C"), + /// JavaStr::from_str("B"), + /// JavaStr::from_str("A") + /// ] + /// ); + /// ``` + #[inline] + pub fn rsplit_terminator

(&self, pat: P) -> RSplitTerminator

+ where + P: JavaStrPattern, + { + RSplitTerminator::new(self, pat) + } + + /// See [`str::rsplitn`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb") + /// .rsplitn(3, ' ') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("lamb"), + /// JavaStr::from_str("little"), + /// JavaStr::from_str("Mary had a") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard") + /// .rsplitn(3, 'X') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("leopard"), + /// JavaStr::from_str("tiger"), + /// JavaStr::from_str("lionX") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lion::tiger::leopard") + /// .rsplitn(2, "::") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("leopard"), + /// JavaStr::from_str("lion::tiger") + /// ] + /// ); + /// ``` + #[inline] + pub fn rsplitn

(&self, n: usize, pat: P) -> RSplitN

+ where + P: JavaStrPattern, + { + RSplitN::new(self, pat, n) + } + + /// See [`str::split`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb") + /// .split(' ') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("Mary"), + /// JavaStr::from_str("had"), + /// JavaStr::from_str("a"), + /// JavaStr::from_str("little"), + /// JavaStr::from_str("lamb") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("").split('X').collect(); + /// assert_eq!(v, [JavaStr::from_str("")]); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard") + /// .split('X') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("lion"), + /// JavaStr::from_str(""), + /// JavaStr::from_str("tiger"), + /// JavaStr::from_str("leopard") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lion::tiger::leopard") + /// .split("::") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("lion"), + /// JavaStr::from_str("tiger"), + /// JavaStr::from_str("leopard") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("abc1def2ghi") + /// .split(JavaCodePoint::is_numeric) + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("abc"), + /// JavaStr::from_str("def"), + /// JavaStr::from_str("ghi") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXtigerXleopard") + /// .split(JavaCodePoint::is_uppercase) + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("lion"), + /// JavaStr::from_str("tiger"), + /// JavaStr::from_str("leopard") + /// ] + /// ); + /// ``` + #[inline] + pub fn split

(&self, pat: P) -> Split

+ where + P: JavaStrPattern, + { + Split::new(self, pat) + } + + /// See [`str::split_ascii_whitespace`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let mut iter = JavaStr::from_str(" Mary had\ta little \n\t lamb").split_ascii_whitespace(); + /// assert_eq!(Some(JavaStr::from_str("Mary")), iter.next()); + /// assert_eq!(Some(JavaStr::from_str("had")), iter.next()); + /// assert_eq!(Some(JavaStr::from_str("a")), iter.next()); + /// assert_eq!(Some(JavaStr::from_str("little")), iter.next()); + /// assert_eq!(Some(JavaStr::from_str("lamb")), iter.next()); + /// + /// assert_eq!(None, iter.next()); + /// ``` + #[inline] + pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> { + #[inline] + fn is_non_empty(bytes: &&[u8]) -> bool { + !bytes.is_empty() + } + + SplitAsciiWhitespace { + inner: self + .as_bytes() + .split(u8::is_ascii_whitespace as fn(&u8) -> bool) + .filter(is_non_empty as fn(&&[u8]) -> bool) + .map(|bytes| unsafe { JavaStr::from_semi_utf8_unchecked(bytes) }), + } + } + + /// See [`str::split_at`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let s = JavaStr::from_str("Per Martin-Lรถf"); + /// + /// let (first, last) = s.split_at(3); + /// + /// assert_eq!("Per", first); + /// assert_eq!(" Martin-Lรถf", last); + /// ``` + /// ```should_panic + /// # use java_string::JavaStr; + /// let s = JavaStr::from_str("Per Martin-Lรถf"); + /// // Should panic + /// let _ = s.split_at(13); + /// ``` + #[inline] + #[must_use] + pub fn split_at(&self, mid: usize) -> (&JavaStr, &JavaStr) { + // is_char_boundary checks that the index is in [0, .len()] + if self.is_char_boundary(mid) { + // SAFETY: just checked that `mid` is on a char boundary. + unsafe { + ( + self.get_unchecked(0..mid), + self.get_unchecked(mid..self.len()), + ) + } + } else { + slice_error_fail(self, 0, mid) + } + } + + /// See [`str::split_at_mut`]. + /// + /// ``` + /// # use java_string::{JavaStr, JavaString}; + /// let mut s = JavaString::from("Per Martin-Lรถf"); + /// let s = s.as_mut_java_str(); + /// + /// let (first, last) = s.split_at_mut(3); + /// + /// assert_eq!("Per", first); + /// assert_eq!(" Martin-Lรถf", last); + /// ``` + /// ```should_panic + /// # use java_string::{JavaStr, JavaString}; + /// let mut s = JavaString::from("Per Martin-Lรถf"); + /// let s = s.as_mut_java_str(); + /// // Should panic + /// let _ = s.split_at(13); + /// ``` + #[inline] + #[must_use] + pub fn split_at_mut(&mut self, mid: usize) -> (&mut JavaStr, &mut JavaStr) { + // is_char_boundary checks that the index is in [0, .len()] + if self.is_char_boundary(mid) { + let len = self.len(); + let ptr = self.as_mut_ptr(); + // SAFETY: just checked that `mid` is on a char boundary. + unsafe { + ( + JavaStr::from_semi_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)), + JavaStr::from_semi_utf8_unchecked_mut(slice::from_raw_parts_mut( + ptr.add(mid), + len - mid, + )), + ) + } + } else { + slice_error_fail(self, 0, mid) + } + } + + /// See [`str::split_inclusive`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb\nlittle lamb\nlittle lamb.\n") + /// .split_inclusive('\n') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("Mary had a little lamb\n"), + /// JavaStr::from_str("little lamb\n"), + /// JavaStr::from_str("little lamb.\n") + /// ] + /// ); + /// ``` + #[inline] + pub fn split_inclusive

(&self, pat: P) -> SplitInclusive

+ where + P: JavaStrPattern, + { + SplitInclusive::new(self, pat) + } + + /// See [`str::split_once`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!(JavaStr::from_str("cfg").split_once('='), None); + /// assert_eq!( + /// JavaStr::from_str("cfg=").split_once('='), + /// Some((JavaStr::from_str("cfg"), JavaStr::from_str(""))) + /// ); + /// assert_eq!( + /// JavaStr::from_str("cfg=foo").split_once('='), + /// Some((JavaStr::from_str("cfg"), JavaStr::from_str("foo"))) + /// ); + /// assert_eq!( + /// JavaStr::from_str("cfg=foo=bar").split_once('='), + /// Some((JavaStr::from_str("cfg"), JavaStr::from_str("foo=bar"))) + /// ); + /// ``` + #[inline] + #[must_use] + pub fn split_once

(&self, mut delimiter: P) -> Option<(&JavaStr, &JavaStr)> + where + P: JavaStrPattern, + { + let (index, len) = delimiter.find_in(self)?; + // SAFETY: pattern is known to return valid indices. + unsafe { + Some(( + self.get_unchecked(..index), + self.get_unchecked(index + len..), + )) + } + } + + /// See [`str::split_terminator`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B.").split_terminator('.').collect(); + /// assert_eq!(v, [JavaStr::from_str("A"), JavaStr::from_str("B")]); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("A..B..").split_terminator(".").collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("A"), + /// JavaStr::from_str(""), + /// JavaStr::from_str("B"), + /// JavaStr::from_str("") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B:C.D") + /// .split_terminator(&['.', ':'][..]) + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("A"), + /// JavaStr::from_str("B"), + /// JavaStr::from_str("C"), + /// JavaStr::from_str("D") + /// ] + /// ); + /// ``` + #[inline] + pub fn split_terminator

(&self, pat: P) -> SplitTerminator

+ where + P: JavaStrPattern, + { + SplitTerminator::new(self, pat) + } + + /// See [`str::split_whitespace`]. + #[inline] + pub fn split_whitespace(&self) -> SplitWhitespace<'_> { + SplitWhitespace { + inner: self + .split(JavaCodePoint::is_whitespace as fn(JavaCodePoint) -> bool) + .filter(|str| !str.is_empty()), + } + } + + /// See [`str::splitn`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lambda") + /// .splitn(3, ' ') + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("Mary"), + /// JavaStr::from_str("had"), + /// JavaStr::from_str("a little lambda") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard") + /// .splitn(3, "X") + /// .collect(); + /// assert_eq!( + /// v, + /// [ + /// JavaStr::from_str("lion"), + /// JavaStr::from_str(""), + /// JavaStr::from_str("tigerXleopard") + /// ] + /// ); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("abcXdef").splitn(1, 'X').collect(); + /// assert_eq!(v, [JavaStr::from_str("abcXdef")]); + /// + /// let v: Vec<&JavaStr> = JavaStr::from_str("").splitn(1, 'X').collect(); + /// assert_eq!(v, [JavaStr::from_str("")]); + /// ``` + #[inline] + pub fn splitn

(&self, n: usize, pat: P) -> SplitN

+ where + P: JavaStrPattern, + { + SplitN::new(self, pat, n) + } + + /// See [`str::starts_with`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// let bananas = JavaStr::from_str("bananas"); + /// + /// assert!(bananas.starts_with("bana")); + /// assert!(!bananas.starts_with("nana")); + /// ``` + #[inline] + #[must_use] + pub fn starts_with

(&self, mut pat: P) -> bool + where + P: JavaStrPattern, + { + pat.prefix_len_in(self).is_some() + } + + /// See [`str::strip_prefix`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!( + /// JavaStr::from_str("foo:bar").strip_prefix("foo:"), + /// Some(JavaStr::from_str("bar")) + /// ); + /// assert_eq!(JavaStr::from_str("foo:bar").strip_prefix("bar"), None); + /// assert_eq!( + /// JavaStr::from_str("foofoo").strip_prefix("foo"), + /// Some(JavaStr::from_str("foo")) + /// ); + /// ``` + #[inline] + #[must_use] + pub fn strip_prefix

(&self, mut prefix: P) -> Option<&JavaStr> + where + P: JavaStrPattern, + { + let len = prefix.prefix_len_in(self)?; + // SAFETY: pattern is known to return valid indices. + unsafe { Some(self.get_unchecked(len..)) } + } + + /// See [`str::strip_suffix`]. + /// + /// ``` + /// # use java_string::JavaStr; + /// assert_eq!( + /// JavaStr::from_str("bar:foo").strip_suffix(":foo"), + /// Some(JavaStr::from_str("bar")) + /// ); + /// assert_eq!(JavaStr::from_str("bar:foo").strip_suffix("bar"), None); + /// assert_eq!( + /// JavaStr::from_str("foofoo").strip_suffix("foo"), + /// Some(JavaStr::from_str("foo")) + /// ); + /// ``` + #[inline] + #[must_use] + pub fn strip_suffix

(&self, mut suffix: P) -> Option<&JavaStr> + where + P: JavaStrPattern, + { + let len = suffix.suffix_len_in(self)?; + // SAFETY: pattern is known to return valid indices. + unsafe { Some(self.get_unchecked(..self.len() - len)) } + } + + /// See [`str::to_ascii_lowercase`]. + #[inline] + #[must_use] + pub fn to_ascii_lowercase(&self) -> JavaString { + let mut s = self.to_owned(); + s.make_ascii_lowercase(); + s + } + + /// See [`str::to_ascii_uppercase`]. + #[inline] + #[must_use] + pub fn to_ascii_uppercase(&self) -> JavaString { + let mut s = self.to_owned(); + s.make_ascii_uppercase(); + s + } + + /// See [`str::to_lowercase`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr, JavaString}; + /// let s = JavaStr::from_str("HELLO"); + /// assert_eq!("hello", s.to_lowercase()); + /// + /// let odysseus = JavaStr::from_str("แฝˆฮ”ฮฅฮฃฮฃฮ•ฮŽฮฃ"); + /// assert_eq!("แฝ€ฮดฯ…ฯƒฯƒฮตฯฯ‚", odysseus.to_lowercase()); + /// + /// let s = JavaString::from("Hello ") + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() + /// + JavaStr::from_str(" World!"); + /// let expected = JavaString::from("hello ") + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() + /// + JavaStr::from_str(" world!"); + /// assert_eq!(expected, s.to_lowercase()); + /// ``` + #[inline] + #[must_use] + pub fn to_lowercase(&self) -> JavaString { + self.transform_string(str::to_lowercase, |ch| ch) + } + + /// See [`str::to_uppercase`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr, JavaString}; + /// let s = JavaStr::from_str("hello"); + /// assert_eq!("HELLO", s.to_uppercase()); + /// + /// let s = JavaStr::from_str("tschรผรŸ"); + /// assert_eq!("TSCHรœSS", s.to_uppercase()); + /// + /// let s = JavaString::from("Hello ") + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() + /// + JavaStr::from_str(" World!"); + /// let expected = JavaString::from("HELLO ") + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() + /// + JavaStr::from_str(" WORLD!"); + /// assert_eq!(expected, s.to_uppercase()); + /// ``` + #[inline] + #[must_use] + pub fn to_uppercase(&self) -> JavaString { + self.transform_string(str::to_uppercase, |ch| ch) + } + + /// See [`str::trim`]. + #[inline] + #[must_use] + pub fn trim(&self) -> &JavaStr { + self.trim_matches(|c: JavaCodePoint| c.is_whitespace()) + } + + /// See [`str::trim_end`]. + #[inline] + #[must_use] + pub fn trim_end(&self) -> &JavaStr { + self.trim_end_matches(|c: JavaCodePoint| c.is_whitespace()) + } + + /// See [`str::trim_end_matches`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// assert_eq!( + /// JavaStr::from_str("11foo1bar11").trim_end_matches('1'), + /// "11foo1bar" + /// ); + /// assert_eq!( + /// JavaStr::from_str("123foo1bar123").trim_end_matches(JavaCodePoint::is_numeric), + /// "123foo1bar" + /// ); + /// + /// let x: &[_] = &['1', '2']; + /// assert_eq!( + /// JavaStr::from_str("12foo1bar12").trim_end_matches(x), + /// "12foo1bar" + /// ); + /// ``` + #[inline] + #[must_use] + pub fn trim_end_matches

(&self, mut pat: P) -> &JavaStr + where + P: JavaStrPattern, + { + let mut str = self; + while let Some(suffix_len) = pat.suffix_len_in(str) { + if suffix_len == 0 { + break; + } + // SAFETY: pattern is known to return valid indices. + str = unsafe { str.get_unchecked(..str.len() - suffix_len) }; + } + str + } + + /// See [`str::trim_matches`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// assert_eq!( + /// JavaStr::from_str("11foo1bar11").trim_matches('1'), + /// "foo1bar" + /// ); + /// assert_eq!( + /// JavaStr::from_str("123foo1bar123").trim_matches(JavaCodePoint::is_numeric), + /// "foo1bar" + /// ); + /// + /// let x: &[_] = &['1', '2']; + /// assert_eq!(JavaStr::from_str("12foo1bar12").trim_matches(x), "foo1bar"); + /// ``` + #[inline] + #[must_use] + pub fn trim_matches

(&self, mut pat: P) -> &JavaStr + where + P: JavaStrPattern, + { + let mut str = self; + while let Some(prefix_len) = pat.prefix_len_in(str) { + if prefix_len == 0 { + break; + } + // SAFETY: pattern is known to return valid indices. + str = unsafe { str.get_unchecked(prefix_len..) }; + } + while let Some(suffix_len) = pat.suffix_len_in(str) { + if suffix_len == 0 { + break; + } + // SAFETY: pattern is known to return valid indices. + str = unsafe { str.get_unchecked(..str.len() - suffix_len) }; + } + str + } + + /// See [`str::trim_start`]. + #[inline] + #[must_use] + pub fn trim_start(&self) -> &JavaStr { + self.trim_start_matches(|c: JavaCodePoint| c.is_whitespace()) + } + + /// See [`str::trim_start_matches`]. + /// + /// ``` + /// # use java_string::{JavaCodePoint, JavaStr}; + /// assert_eq!( + /// JavaStr::from_str("11foo1bar11").trim_start_matches('1'), + /// "foo1bar11" + /// ); + /// assert_eq!( + /// JavaStr::from_str("123foo1bar123").trim_start_matches(JavaCodePoint::is_numeric), + /// "foo1bar123" + /// ); + /// + /// let x: &[_] = &['1', '2']; + /// assert_eq!( + /// JavaStr::from_str("12foo1bar12").trim_start_matches(x), + /// "foo1bar12" + /// ); + /// ``` + #[inline] + #[must_use] + pub fn trim_start_matches

(&self, mut pat: P) -> &JavaStr + where + P: JavaStrPattern, + { + let mut str = self; + while let Some(prefix_len) = pat.prefix_len_in(str) { + if prefix_len == 0 { + break; + } + // SAFETY: pattern is known to return valid indices. + str = unsafe { str.get_unchecked(prefix_len..) }; + } + str + } + + #[inline] + fn transform_string( + &self, + mut string_transformer: SF, + invalid_char_transformer: ICF, + ) -> JavaString + where + SF: FnMut(&str) -> String, + ICF: FnMut(&JavaStr) -> &JavaStr, + { + let bytes = self.as_bytes(); + match run_utf8_full_validation_from_semi(bytes) { + Ok(()) => JavaString::from(string_transformer(unsafe { + // SAFETY: validation succeeded + std::str::from_utf8_unchecked(bytes) + })), + Err(error) => { + self.transform_invalid_string(error, string_transformer, invalid_char_transformer) + } + } + } + + #[inline] + fn transform_invalid_string( + &self, + error: Utf8Error, + mut string_transformer: SF, + mut invalid_char_transformer: ICF, + ) -> JavaString + where + SF: FnMut(&str) -> String, + ICF: FnMut(&JavaStr) -> &JavaStr, + { + let bytes = self.as_bytes(); + let mut result = JavaString::from(string_transformer(unsafe { + // SAFETY: validation succeeded up to this index + std::str::from_utf8_unchecked(bytes.get_unchecked(..error.valid_up_to)) + })); + result.push_java_str(invalid_char_transformer(unsafe { + // SAFETY: any UTF-8 error in semi-valid UTF-8 is a 3 byte long sequence + // representing a surrogate code point. We're pushing that sequence now + JavaStr::from_semi_utf8_unchecked( + bytes.get_unchecked(error.valid_up_to..error.valid_up_to + 3), + ) + })); + let mut index = error.valid_up_to + 3; + loop { + let remainder = unsafe { bytes.get_unchecked(index..) }; + match run_utf8_full_validation_from_semi(remainder) { + Ok(()) => { + result.push_str(&string_transformer(unsafe { + // SAFETY: validation succeeded + std::str::from_utf8_unchecked(remainder) + })); + return result; + } + Err(error) => { + result.push_str(&string_transformer(unsafe { + // SAFETY: validation succeeded up to this index + std::str::from_utf8_unchecked( + bytes.get_unchecked(index..index + error.valid_up_to), + ) + })); + result.push_java_str(invalid_char_transformer(unsafe { + // SAFETY: see comment above + JavaStr::from_semi_utf8_unchecked(bytes.get_unchecked( + index + error.valid_up_to..index + error.valid_up_to + 3, + )) + })); + index += error.valid_up_to + 3; + } + } + } + } +} + +impl<'a> Add<&JavaStr> for Cow<'a, JavaStr> { + type Output = Cow<'a, JavaStr>; + + #[inline] + fn add(mut self, rhs: &JavaStr) -> Self::Output { + self += rhs; + self + } +} + +impl<'a> AddAssign<&JavaStr> for Cow<'a, JavaStr> { + #[inline] + fn add_assign(&mut self, rhs: &JavaStr) { + if !rhs.is_empty() { + match self { + Cow::Borrowed(lhs) => { + let mut result = lhs.to_owned(); + result.push_java_str(rhs); + *self = Cow::Owned(result); + } + Cow::Owned(lhs) => { + lhs.push_java_str(rhs); + } + } + } + } +} + +impl AsRef<[u8]> for JavaStr { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &JavaStr { + JavaStr::from_str(self) + } +} + +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &JavaStr { + JavaStr::from_str(self) + } +} + +impl AsRef for JavaStr { + #[inline] + fn as_ref(&self) -> &JavaStr { + self + } +} + +impl Clone for Box { + #[inline] + fn clone(&self) -> Self { + let buf: Box<[u8]> = self.as_bytes().into(); + unsafe { JavaStr::from_boxed_semi_utf8_unchecked(buf) } + } +} + +impl Debug for JavaStr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_char('"')?; + let mut from = 0; + for (i, c) in self.char_indices() { + let esc = c.escape_debug_ext(EscapeDebugExtArgs { + escape_single_quote: false, + escape_double_quote: true, + }); + // If char needs escaping, flush backlog so far and write, else skip. + // Also handle invalid UTF-8 here + if esc.len() != 1 || c.as_char().is_none() { + unsafe { + // SAFETY: any invalid UTF-8 should have been caught by a previous iteration + f.write_str(self[from..i].as_str_unchecked())?; + } + for c in esc { + f.write_char(c)?; + } + from = i + c.len_utf8(); + } + } + unsafe { + // SAFETY: any invalid UTF-8 should have been caught by the loop above + f.write_str(self[from..].as_str_unchecked())?; + } + f.write_char('"') + } +} + +impl Default for &JavaStr { + #[inline] + fn default() -> Self { + JavaStr::from_str("") + } +} + +impl Default for Box { + #[inline] + fn default() -> Self { + JavaStr::from_boxed_str(Box::::default()) + } +} + +impl Display for JavaStr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.as_str_lossy(), f) + } +} + +impl<'a> From<&'a JavaStr> for Cow<'a, JavaStr> { + #[inline] + fn from(value: &'a JavaStr) -> Self { + Cow::Borrowed(value) + } +} + +impl From<&JavaStr> for Arc { + #[inline] + fn from(value: &JavaStr) -> Self { + let arc = Arc::<[u8]>::from(value.as_bytes()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const JavaStr) } + } +} + +impl From<&JavaStr> for Box { + #[inline] + fn from(value: &JavaStr) -> Self { + unsafe { JavaStr::from_boxed_semi_utf8_unchecked(Box::from(value.as_bytes())) } + } +} + +impl From<&JavaStr> for Rc { + #[inline] + fn from(value: &JavaStr) -> Self { + let rc = Rc::<[u8]>::from(value.as_bytes()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const JavaStr) } + } +} + +impl From<&JavaStr> for Vec { + #[inline] + fn from(value: &JavaStr) -> Self { + From::from(value.as_bytes()) + } +} + +impl From> for Box { + #[inline] + fn from(value: Cow<'_, JavaStr>) -> Self { + match value { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +impl From for Box { + #[inline] + fn from(value: JavaString) -> Self { + value.into_boxed_str() + } +} + +impl<'a> From<&'a str> for &'a JavaStr { + #[inline] + fn from(value: &'a str) -> Self { + JavaStr::from_str(value) + } +} + +impl<'a> From<&'a String> for &'a JavaStr { + #[inline] + fn from(value: &'a String) -> Self { + JavaStr::from_str(value) + } +} + +impl Hash for JavaStr { + #[inline] + fn hash(&self, state: &mut H) { + state.write(self.as_bytes()); + state.write_u8(0xFF); + } +} + +impl Index for JavaStr +where + I: JavaStrSliceIndex, +{ + type Output = JavaStr; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + index.index(self) + } +} + +impl IndexMut for JavaStr +where + I: JavaStrSliceIndex, +{ + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + index.index_mut(self) + } +} + +impl<'a, 'b> PartialEq<&'b JavaStr> for Cow<'a, str> { + #[inline] + fn eq(&self, other: &&'b JavaStr) -> bool { + self == *other + } +} + +impl<'a, 'b> PartialEq<&'b JavaStr> for Cow<'a, JavaStr> { + #[inline] + fn eq(&self, other: &&'b JavaStr) -> bool { + self == *other + } +} + +impl<'a, 'b> PartialEq> for &'b JavaStr { + #[inline] + fn eq(&self, other: &Cow<'a, str>) -> bool { + *self == other + } +} + +impl<'a> PartialEq> for JavaStr { + #[inline] + fn eq(&self, other: &Cow<'a, str>) -> bool { + other == self + } +} + +impl<'a, 'b> PartialEq> for &'b JavaStr { + #[inline] + fn eq(&self, other: &Cow<'a, JavaStr>) -> bool { + *self == other + } +} + +impl<'a> PartialEq> for JavaStr { + #[inline] + fn eq(&self, other: &Cow<'a, JavaStr>) -> bool { + other == self + } +} + +impl<'a> PartialEq for &'a JavaStr { + #[inline] + fn eq(&self, other: &String) -> bool { + *self == other + } +} + +impl PartialEq for JavaStr { + #[inline] + fn eq(&self, other: &String) -> bool { + self == &other[..] + } +} + +impl PartialEq for String { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + &self[..] == other + } +} + +impl<'a> PartialEq for &'a JavaStr { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + *self == other + } +} + +impl PartialEq for JavaStr { + #[inline] + fn eq(&self, other: &JavaString) -> bool { + self == other[..] + } +} + +impl<'a> PartialEq for Cow<'a, str> { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + match self { + Cow::Borrowed(this) => this == other, + Cow::Owned(this) => this == other, + } + } +} + +impl<'a> PartialEq for Cow<'a, JavaStr> { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + match self { + Cow::Borrowed(this) => this == other, + Cow::Owned(this) => this == other, + } + } +} + +impl PartialEq for str { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + JavaStr::from_str(self) == other + } +} + +impl<'a> PartialEq for &'a str { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + self.as_bytes() == &other.inner + } +} + +impl PartialEq for JavaStr { + #[inline] + fn eq(&self, other: &str) -> bool { + &self.inner == other.as_bytes() + } +} + +impl<'a> PartialEq<&'a str> for JavaStr { + #[inline] + fn eq(&self, other: &&'a str) -> bool { + &self.inner == other.as_bytes() + } +} + +impl<'a> PartialEq for &'a JavaStr { + #[inline] + fn eq(&self, other: &JavaStr) -> bool { + self.inner == other.inner + } +} + +impl<'a> PartialEq<&'a JavaStr> for JavaStr { + #[inline] + fn eq(&self, other: &&'a JavaStr) -> bool { + self.inner == other.inner + } +} + +impl ToOwned for JavaStr { + type Owned = JavaString; + + #[inline] + fn to_owned(&self) -> Self::Owned { + unsafe { JavaString::from_semi_utf8_unchecked(self.as_bytes().to_vec()) } + } +} + +mod private_slice_index { + use std::ops; + + pub trait Sealed {} + + impl Sealed for ops::Range {} + impl Sealed for ops::RangeTo {} + impl Sealed for ops::RangeFrom {} + impl Sealed for ops::RangeFull {} + impl Sealed for ops::RangeInclusive {} + impl Sealed for ops::RangeToInclusive {} +} + +/// # Safety +/// +/// Implementations' `check_bounds` method must properly check the bounds of the +/// slice, such that calling `get_unchecked` is not UB. +pub unsafe trait JavaStrSliceIndex: private_slice_index::Sealed + Sized { + fn check_bounds(&self, slice: &JavaStr) -> bool; + fn check_bounds_fail(self, slice: &JavaStr) -> !; + + /// # Safety + /// + /// - The input slice must be a valid pointer + /// - This index must not be out of bounds of the input slice + /// - The indices of this slice must point to char boundaries in the input + /// slice + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr; + + /// # Safety + /// + /// - The input slice must be a valid pointer + /// - This index must not be out of bounds of the input slice + /// - The indices of this slice must point to char boundaries in the input + /// slice + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr; + + #[inline] + fn get(self, slice: &JavaStr) -> Option<&JavaStr> { + self.check_bounds(slice) + .then(|| unsafe { &*self.get_unchecked(slice) }) + } + + #[inline] + fn get_mut(self, slice: &mut JavaStr) -> Option<&mut JavaStr> { + self.check_bounds(slice) + .then(|| unsafe { &mut *self.get_unchecked_mut(slice) }) + } + + #[inline] + fn index(self, slice: &JavaStr) -> &JavaStr { + if self.check_bounds(slice) { + unsafe { &*self.get_unchecked(slice) } + } else { + self.check_bounds_fail(slice) + } + } + + #[inline] + fn index_mut(self, slice: &mut JavaStr) -> &mut JavaStr { + if self.check_bounds(slice) { + unsafe { &mut *self.get_unchecked_mut(slice) } + } else { + self.check_bounds_fail(slice) + } + } +} + +unsafe impl JavaStrSliceIndex for RangeFull { + #[inline] + fn check_bounds(&self, _slice: &JavaStr) -> bool { + true + } + + #[inline] + fn check_bounds_fail(self, _slice: &JavaStr) -> ! { + unreachable!() + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr { + slice + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr { + slice + } +} + +unsafe impl JavaStrSliceIndex for Range { + #[inline] + fn check_bounds(&self, slice: &JavaStr) -> bool { + self.start <= self.end + && slice.is_char_boundary(self.start) + && slice.is_char_boundary(self.end) + } + + #[inline] + #[track_caller] + fn check_bounds_fail(self, slice: &JavaStr) -> ! { + slice_error_fail(slice, self.start, self.end) + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr { + let slice = slice as *const [u8]; + // SAFETY: the caller guarantees that `self` is in bounds of `slice` + // which satisfies all the conditions for `add`. + let ptr = unsafe { (slice as *const u8).add(self.start) }; + let len = self.end - self.start; + ptr::slice_from_raw_parts(ptr, len) as *const JavaStr + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr { + let slice = slice as *mut [u8]; + // SAFETY: see comments for `get_unchecked`. + let ptr = unsafe { (slice as *mut u8).add(self.start) }; + let len = self.end - self.start; + ptr::slice_from_raw_parts_mut(ptr, len) as *mut JavaStr + } +} + +unsafe impl JavaStrSliceIndex for RangeTo { + #[inline] + fn check_bounds(&self, slice: &JavaStr) -> bool { + slice.is_char_boundary(self.end) + } + + #[inline] + #[track_caller] + fn check_bounds_fail(self, slice: &JavaStr) -> ! { + slice_error_fail(slice, 0, self.end) + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr { + unsafe { (0..self.end).get_unchecked(slice) } + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr { + unsafe { (0..self.end).get_unchecked_mut(slice) } + } +} + +unsafe impl JavaStrSliceIndex for RangeFrom { + #[inline] + fn check_bounds(&self, slice: &JavaStr) -> bool { + slice.is_char_boundary(self.start) + } + + #[inline] + #[track_caller] + fn check_bounds_fail(self, slice: &JavaStr) -> ! { + slice_error_fail(slice, self.start, slice.len()) + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr { + let len = unsafe { (*(slice as *const [u8])).len() }; + unsafe { (self.start..len).get_unchecked(slice) } + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr { + let len = unsafe { (*(slice as *mut [u8])).len() }; + unsafe { (self.start..len).get_unchecked_mut(slice) } + } +} + +#[inline] +fn into_slice_range(range: RangeInclusive) -> Range { + let exclusive_end = *range.end() + 1; + let start = match range.end_bound() { + Bound::Excluded(..) => exclusive_end, // excluded + Bound::Included(..) => *range.start(), + Bound::Unbounded => unreachable!(), + }; + start..exclusive_end +} + +unsafe impl JavaStrSliceIndex for RangeInclusive { + #[inline] + fn check_bounds(&self, slice: &JavaStr) -> bool { + *self.end() != usize::MAX && into_slice_range(self.clone()).check_bounds(slice) + } + + #[inline] + #[track_caller] + fn check_bounds_fail(self, slice: &JavaStr) -> ! { + if *self.end() == usize::MAX { + str_end_index_overflow_fail() + } else { + into_slice_range(self).check_bounds_fail(slice) + } + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr { + into_slice_range(self).get_unchecked(slice) + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr { + into_slice_range(self).get_unchecked_mut(slice) + } +} + +unsafe impl JavaStrSliceIndex for RangeToInclusive { + #[inline] + fn check_bounds(&self, slice: &JavaStr) -> bool { + (0..=self.end).check_bounds(slice) + } + + #[inline] + fn check_bounds_fail(self, slice: &JavaStr) -> ! { + (0..=self.end).check_bounds_fail(slice) + } + + #[inline] + unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr { + (0..=self.end).get_unchecked(slice) + } + + #[inline] + unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr { + (0..=self.end).get_unchecked_mut(slice) + } +} diff --git a/java_string/src/validations.rs b/java_string/src/validations.rs new file mode 100644 index 0000000..a3518dd --- /dev/null +++ b/java_string/src/validations.rs @@ -0,0 +1,365 @@ +use std::ops::{Bound, Range, RangeBounds, RangeTo}; + +use crate::{JavaStr, Utf8Error}; + +pub(crate) const TAG_CONT: u8 = 0b1000_0000; +pub(crate) const TAG_TWO_B: u8 = 0b1100_0000; +pub(crate) const TAG_THREE_B: u8 = 0b1110_0000; +pub(crate) const TAG_FOUR_B: u8 = 0b1111_0000; +pub(crate) const CONT_MASK: u8 = 0b0011_1111; + +#[inline] +const fn utf8_first_byte(byte: u8, width: u32) -> u32 { + (byte & (0x7F >> width)) as u32 +} + +#[inline] +const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { + (ch << 6) | (byte & CONT_MASK) as u32 +} + +#[inline] +const fn utf8_is_cont_byte(byte: u8) -> bool { + (byte as i8) < -64 +} + +/// # Safety +/// +/// `bytes` must produce a semi-valid UTF-8 string +#[inline] +pub(crate) unsafe fn next_code_point<'a, I: Iterator>(bytes: &mut I) -> Option { + // Decode UTF-8 + let x = *bytes.next()?; + if x < 128 { + return Some(x.into()); + } + + // Multibyte case follows + // Decode from a byte combination out of: [[[x y] z] w] + // NOTE: Performance is sensitive to the exact formulation here + let init = utf8_first_byte(x, 2); + // SAFETY: `bytes` produces an UTF-8-like string, + // so the iterator must produce a value here. + let y = unsafe { *bytes.next().unwrap_unchecked() }; + let mut ch = utf8_acc_cont_byte(init, y); + if x >= 0xE0 { + // [[x y z] w] case + // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid + // SAFETY: `bytes` produces an UTF-8-like string, + // so the iterator must produce a value here. + let z = unsafe { *bytes.next().unwrap_unchecked() }; + let y_z = utf8_acc_cont_byte((y & CONT_MASK).into(), z); + ch = init << 12 | y_z; + if x >= 0xF0 { + // [x y z w] case + // use only the lower 3 bits of `init` + // SAFETY: `bytes` produces an UTF-8-like string, + // so the iterator must produce a value here. + let w = unsafe { *bytes.next().unwrap_unchecked() }; + ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w); + } + } + + Some(ch) +} + +/// # Safety +/// +/// `bytes` must produce a semi-valid UTF-8 string +#[inline] +pub(crate) unsafe fn next_code_point_reverse<'a, I: DoubleEndedIterator>( + bytes: &mut I, +) -> Option { + // Decode UTF-8 + let w = match *bytes.next_back()? { + next_byte if next_byte < 128 => return Some(next_byte.into()), + back_byte => back_byte, + }; + + // Multibyte case follows + // Decode from a byte combination out of: [x [y [z w]]] + let mut ch; + // SAFETY: `bytes` produces an UTF-8-like string, + // so the iterator must produce a value here. + let z = unsafe { *bytes.next_back().unwrap_unchecked() }; + ch = utf8_first_byte(z, 2); + if utf8_is_cont_byte(z) { + // SAFETY: `bytes` produces an UTF-8-like string, + // so the iterator must produce a value here. + let y = unsafe { *bytes.next_back().unwrap_unchecked() }; + ch = utf8_first_byte(y, 3); + if utf8_is_cont_byte(y) { + // SAFETY: `bytes` produces an UTF-8-like string, + // so the iterator must produce a value here. + let x = unsafe { *bytes.next_back().unwrap_unchecked() }; + ch = utf8_first_byte(x, 4); + ch = utf8_acc_cont_byte(ch, y); + } + ch = utf8_acc_cont_byte(ch, z); + } + ch = utf8_acc_cont_byte(ch, w); + + Some(ch) +} + +#[inline(always)] +pub(crate) fn run_utf8_semi_validation(v: &[u8]) -> Result<(), Utf8Error> { + let mut index = 0; + let len = v.len(); + + let usize_bytes = std::mem::size_of::(); + let ascii_block_size = 2 * usize_bytes; + let blocks_end = if len >= ascii_block_size { + len - ascii_block_size + 1 + } else { + 0 + }; + let align = v.as_ptr().align_offset(usize_bytes); + + while index < len { + let old_offset = index; + macro_rules! err { + ($error_len:expr) => { + return Err(Utf8Error { + valid_up_to: old_offset, + error_len: $error_len, + }) + }; + } + + macro_rules! next { + () => {{ + index += 1; + // we needed data, but there was none: error! + if index >= len { + err!(None) + } + v[index] + }}; + } + + let first = v[index]; + if first >= 128 { + let w = utf8_char_width(first); + // 2-byte encoding is for codepoints \u{0080} to \u{07ff} + // first C2 80 last DF BF + // 3-byte encoding is for codepoints \u{0800} to \u{ffff} + // first E0 A0 80 last EF BF BF + // INCLUDING surrogates codepoints \u{d800} to \u{dfff} + // ED A0 80 to ED BF BF + // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff + // first F0 90 80 80 last F4 8F BF BF + // + // Use the UTF-8 syntax from the RFC + // + // https://tools.ietf.org/html/rfc3629 + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + match w { + 2 => { + if next!() as i8 >= -64 { + err!(Some(1)) + } + } + 3 => { + match (first, next!()) { + (0xE0, 0xA0..=0xBF) | (0xE1..=0xEF, 0x80..=0xBF) => {} /* INCLUDING surrogate codepoints here */ + _ => err!(Some(1)), + } + if next!() as i8 >= -64 { + err!(Some(2)) + } + } + 4 => { + match (first, next!()) { + (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {} + _ => err!(Some(1)), + } + if next!() as i8 >= -64 { + err!(Some(2)) + } + if next!() as i8 >= -64 { + err!(Some(3)) + } + } + _ => err!(Some(1)), + } + index += 1; + } else { + // Ascii case, try to skip forward quickly. + // When the pointer is aligned, read 2 words of data per iteration + // until we find a word containing a non-ascii byte. + if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 { + let ptr = v.as_ptr(); + while index < blocks_end { + // SAFETY: since `align - index` and `ascii_block_size` are + // multiples of `usize_bytes`, `block = ptr.add(index)` is + // always aligned with a `usize` so it's safe to dereference + // both `block` and `block.add(1)`. + unsafe { + let block = ptr.add(index) as *const usize; + // break if there is a nonascii byte + let zu = contains_nonascii(*block); + let zv = contains_nonascii(*block.add(1)); + if zu || zv { + break; + } + } + index += ascii_block_size; + } + // step from the point where the wordwise loop stopped + while index < len && v[index] < 128 { + index += 1; + } + } else { + index += 1; + } + } + } + + Ok(()) +} + +#[inline(always)] +pub(crate) const fn run_utf8_full_validation_from_semi(v: &[u8]) -> Result<(), Utf8Error> { + // this function checks for surrogate codepoints, between \u{d800} to \u{dfff}, + // or ED A0 80 to ED BF BF of width 3 unicode chars. The valid range of width 3 + // characters is ED 80 80 to ED BF BF, so we need to check for an ED byte + // followed by a >=A0 byte. + let mut index = 0; + while index + 3 <= v.len() { + if v[index] == 0xED && v[index + 1] >= 0xA0 { + return Err(Utf8Error { + valid_up_to: index, + error_len: Some(1), + }); + } + index += 1; + } + + Ok(()) +} + +#[inline] +pub(crate) const fn utf8_char_width(first_byte: u8) -> usize { + const UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]; + + UTF8_CHAR_WIDTH[first_byte as usize] as usize +} + +#[inline] +const fn contains_nonascii(x: usize) -> bool { + const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; std::mem::size_of::()]); + (x & NONASCII_MASK) != 0 +} + +#[cold] +#[track_caller] +pub(crate) fn slice_error_fail(s: &JavaStr, begin: usize, end: usize) -> ! { + const MAX_DISPLAY_LENGTH: usize = 256; + let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH); + let s_trunc = &s[..trunc_len]; + let ellipsis = if trunc_len < s.len() { "[...]" } else { "" }; + + // 1. out of bounds + if begin > s.len() || end > s.len() { + let oob_index = if begin > s.len() { begin } else { end }; + panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}"); + } + + // 2. begin <= end + assert!( + begin <= end, + "begin <= end ({begin} <= {end}) when slicing `{s_trunc}`{ellipsis}", + ); + + // 3. character boundary + let index = if !s.is_char_boundary(begin) { + begin + } else { + end + }; + // find the character + let char_start = s.floor_char_boundary(index); + // `char_start` must be less than len and a char boundary + let ch = s[char_start..].chars().next().unwrap(); + let char_range = char_start..char_start + ch.len_utf8(); + panic!( + "byte index {index} is not a char boundary; it is inside {ch:?} (bytes {char_range:?}) of \ + `{s_trunc}`{ellipsis}", + ); +} + +#[cold] +#[track_caller] +pub(crate) fn str_end_index_len_fail(index: usize, len: usize) -> ! { + panic!("range end index {index} out of range for JavaStr of length {len}"); +} + +#[cold] +#[track_caller] +pub(crate) fn str_index_order_fail(index: usize, end: usize) -> ! { + panic!("JavaStr index starts at {index} but ends at {end}"); +} + +#[cold] +#[track_caller] +pub(crate) fn str_start_index_overflow_fail() -> ! { + panic!("attempted to index JavaStr from after maximum usize"); +} + +#[cold] +#[track_caller] +pub(crate) fn str_end_index_overflow_fail() -> ! { + panic!("attempted to index JavaStr up to maximum usize") +} + +#[inline] +#[track_caller] +pub(crate) fn to_range_checked(range: R, bounds: RangeTo) -> Range +where + R: RangeBounds, +{ + let len = bounds.end; + + let start = range.start_bound(); + let start = match start { + Bound::Included(&start) => start, + Bound::Excluded(start) => start + .checked_add(1) + .unwrap_or_else(|| str_start_index_overflow_fail()), + Bound::Unbounded => 0, + }; + + let end: Bound<&usize> = range.end_bound(); + let end = match end { + Bound::Included(end) => end + .checked_add(1) + .unwrap_or_else(|| str_end_index_overflow_fail()), + Bound::Excluded(&end) => end, + Bound::Unbounded => len, + }; + + if start > end { + str_index_order_fail(start, end); + } + if end > len { + str_end_index_len_fail(end, len); + } + + Range { start, end } +} diff --git a/valence_nbt/Cargo.toml b/valence_nbt/Cargo.toml new file mode 100644 index 0000000..2756d15 --- /dev/null +++ b/valence_nbt/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "valence_nbt" +version = "0.8.0" +description = "Minecraft's Named Binary Tag (NBT) format." +edition.workspace = true +license.workspace = true +repository.workspace = true +keywords = ["nbt", "minecraft", "serialization"] +categories = ["data-structures", "game-development"] + +[features] +binary = ["dep:byteorder", "dep:cesu8"] +java_string = ["dep:java_string"] +snbt = [] +preserve_order = ["dep:indexmap"] +serde = ["dep:serde", "dep:thiserror", "indexmap?/serde"] + +[dependencies] +byteorder = { version = "1.5.0", optional = true } +cesu8 = { version = "1.1.0", optional = true } +indexmap = { version = "2.2.6", optional = true } +java_string = { version = "0.1.2", path = "../java_string", optional = true } +serde = { version = "1.0.200", features = ["derive"], optional = true } +thiserror = { version = "1.0.59", optional = true } +uuid = { version = "1.8.0", optional = true } + +[dev-dependencies] +pretty_assertions = "1.4.0" +serde_json = "1.0.116" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[lints] +workspace = true diff --git a/valence_nbt/src/binary.rs b/valence_nbt/src/binary.rs new file mode 100644 index 0000000..a9d62f7 --- /dev/null +++ b/valence_nbt/src/binary.rs @@ -0,0 +1,71 @@ +//! Support for serializing and deserializing compounds in Java edition's binary +//! format. +//! +//! # Examples +//! +//! ``` +//! use valence_nbt::{compound, to_binary, Compound, List}; +//! +//! let c = compound! { +//! "byte" => 5_i8, +//! "string" => "hello", +//! "list_of_float" => List::Float(vec![ +//! 3.1415, +//! 2.7182, +//! 1.4142 +//! ]), +//! }; +//! +//! let mut buf = vec![]; +//! +//! to_binary(&mut buf, "", &c).unwrap(); +//! ``` +//! +//! Decode NBT data from its binary form. +//! +//! ``` +//! use valence_nbt::{compound, from_binary, Compound, Value}; +//! +//! let some_bytes = [10, 0, 0, 3, 0, 3, 105, 110, 116, 0, 0, 222, 173, 0]; +//! +//! let expected_value = compound! { +//! "int" => 0xdead +//! }; +//! +//! let (root_name, nbt) = from_binary(&mut some_bytes.as_slice()).unwrap().unwrap(); +//! +//! assert_eq!(nbt, Value::from(expected_value)); +//! assert_eq!(root_name, ""); +//! ``` + +mod decode; +mod encode; +mod modified_utf8; +#[cfg(test)] +mod tests; + +pub use decode::*; +pub use encode::*; + +use crate::Tag; + +impl Tag { + /// Returns the name of this tag for error reporting purposes. + const fn name(self) -> &'static str { + match self { + Tag::End => "end", + Tag::Byte => "byte", + Tag::Short => "short", + Tag::Int => "int", + Tag::Long => "long", + Tag::Float => "float", + Tag::Double => "double", + Tag::ByteArray => "byte array", + Tag::String => "string", + Tag::List => "list", + Tag::Compound => "compound", + Tag::IntArray => "int array", + Tag::LongArray => "long array", + } + } +} diff --git a/valence_nbt/src/binary/decode.rs b/valence_nbt/src/binary/decode.rs new file mode 100644 index 0000000..19c291c --- /dev/null +++ b/valence_nbt/src/binary/decode.rs @@ -0,0 +1,429 @@ +use std::borrow::Cow; +use std::hash::Hash; +use std::{fmt, io, mem}; + +use crate::conv::u8_slice_as_i8_slice; +use crate::tag::Tag; +use crate::{Compound, Error, List, Result, Value}; + +/// Decode an NBT value from the given buffer of bytes. +/// +/// Returns both the root NBT value and the root name (typically the empty +/// string). If the root value is of type [`Tag::End`], then `None` is returned. +/// If the data is malformed or the reader returns an error, then an error is +/// returned. +pub fn from_binary<'a, S>(reader: impl ReadBytes<'a>) -> Result)>> +where + S: FromModifiedUtf8<'a> + Hash + Ord, +{ + let mut state = DecodeState { reader, depth: 0 }; + + let tag = state.read_tag()?; + + if tag == Tag::End { + return Ok(None); + } + + let name = state.read_string::()?; + let value = state.read_value::(tag)?; + + debug_assert_eq!(state.depth, 0); + + Ok(Some((name, value))) +} + +/// Maximum recursion depth to prevent overflowing the call stack. +const MAX_DEPTH: usize = 512; + +struct DecodeState { + reader: R, + /// Current recursion depth. + depth: usize, +} + +impl<'a, R: ReadBytes<'a>> DecodeState { + #[inline] + fn check_depth(&mut self, f: impl FnOnce(&mut Self) -> Result) -> Result { + if self.depth >= MAX_DEPTH { + return Err(Error::new_static("reached maximum recursion depth")); + } + + self.depth += 1; + let res = f(self); + self.depth -= 1; + res + } + + fn read_tag(&mut self) -> Result { + match self.read_byte()? { + 0 => Ok(Tag::End), + 1 => Ok(Tag::Byte), + 2 => Ok(Tag::Short), + 3 => Ok(Tag::Int), + 4 => Ok(Tag::Long), + 5 => Ok(Tag::Float), + 6 => Ok(Tag::Double), + 7 => Ok(Tag::ByteArray), + 8 => Ok(Tag::String), + 9 => Ok(Tag::List), + 10 => Ok(Tag::Compound), + 11 => Ok(Tag::IntArray), + 12 => Ok(Tag::LongArray), + byte => Err(Error::new_owned(format!("invalid tag byte of {byte:#x}"))), + } + } + + /// Read a value identified by the given tag. + /// + /// # Panics + /// + /// Panics if the tag is [`Tag::End`]. + #[track_caller] + fn read_value(&mut self, tag: Tag) -> Result> + where + S: FromModifiedUtf8<'a> + Hash + Ord, + { + Ok(match tag { + Tag::End => panic!("cannot read value of Tag_END"), + Tag::Byte => self.read_byte()?.into(), + Tag::Short => self.read_short()?.into(), + Tag::Int => self.read_int()?.into(), + Tag::Long => self.read_long()?.into(), + Tag::Float => self.read_float()?.into(), + Tag::Double => self.read_double()?.into(), + Tag::ByteArray => self.read_byte_array()?.into(), + Tag::String => Value::String(self.read_string::()?), + Tag::List => self.check_depth(|st| st.read_any_list::())?.into(), + Tag::Compound => self.check_depth(|st| st.read_compound::())?.into(), + Tag::IntArray => self.read_int_array()?.into(), + Tag::LongArray => self.read_long_array()?.into(), + }) + } + + fn read_byte(&mut self) -> Result { + Ok(self.reader.read_bytes(1)?[0] as i8) + } + + fn read_short(&mut self) -> Result { + Ok(i16::from_be_bytes( + self.reader.read_bytes(2)?.try_into().unwrap(), + )) + } + + fn read_int(&mut self) -> Result { + Ok(i32::from_be_bytes( + self.reader.read_bytes(4)?.try_into().unwrap(), + )) + } + + fn read_long(&mut self) -> Result { + Ok(i64::from_be_bytes( + self.reader.read_bytes(8)?.try_into().unwrap(), + )) + } + + fn read_float(&mut self) -> Result { + Ok(f32::from_be_bytes( + self.reader.read_bytes(4)?.try_into().unwrap(), + )) + } + + fn read_double(&mut self) -> Result { + Ok(f64::from_be_bytes( + self.reader.read_bytes(8)?.try_into().unwrap(), + )) + } + + fn read_byte_array(&mut self) -> Result> { + let len = self.read_int()?; + + if len.is_negative() { + return Err(Error::new_owned(format!( + "negative byte array length of {len}" + ))); + } + + if len as usize > self.reader.remaining() { + return Err(Error::new_owned(format!( + "byte array length of {len} exceeds remainder of input" + ))); + } + + let slice = u8_slice_as_i8_slice(self.reader.read_bytes(len as usize)?); + + debug_assert_eq!(slice.len(), len as usize); + + Ok(slice.into()) + } + + fn read_string(&mut self) -> Result + where + S: FromModifiedUtf8<'a>, + { + let len = self.read_short()? as usize; + + if len > self.reader.remaining() { + return Err(Error::new_owned(format!( + "string of length {len} exceeds remainder of input" + ))); + } + + S::from_modified_utf8(self.reader.read_bytes(len)?) + .map_err(|_| Error::new_static("could not decode modified UTF-8 data")) + } + + fn read_any_list(&mut self) -> Result> + where + S: FromModifiedUtf8<'a> + Hash + Ord, + { + match self.read_tag()? { + Tag::End => match self.read_int()? { + 0 => Ok(List::End), + len => Err(Error::new_owned(format!( + "TAG_End list with nonzero length of {len}" + ))), + }, + Tag::Byte => Ok(self.read_list(Tag::Byte, |st| st.read_byte())?.into()), + Tag::Short => Ok(self.read_list(Tag::Short, |st| st.read_short())?.into()), + Tag::Int => Ok(self.read_list(Tag::Int, |st| st.read_int())?.into()), + Tag::Long => Ok(self.read_list(Tag::Long, |st| st.read_long())?.into()), + Tag::Float => Ok(self.read_list(Tag::Float, |st| st.read_float())?.into()), + Tag::Double => Ok(self.read_list(Tag::Double, |st| st.read_double())?.into()), + Tag::ByteArray => Ok(self + .read_list(Tag::ByteArray, |st| st.read_byte_array())? + .into()), + Tag::String => Ok(List::String( + self.read_list(Tag::String, |st| st.read_string::())?, + )), + Tag::List => self.check_depth(|st| { + Ok(st + .read_list(Tag::List, |st| st.read_any_list::())? + .into()) + }), + Tag::Compound => self.check_depth(|st| { + Ok(st + .read_list(Tag::Compound, |st| st.read_compound::())? + .into()) + }), + Tag::IntArray => Ok(self + .read_list(Tag::IntArray, |st| st.read_int_array())? + .into()), + Tag::LongArray => Ok(self + .read_list(Tag::LongArray, |st| st.read_long_array())? + .into()), + } + } + + /// Assumes the element tag has already been read. + #[inline] + fn read_list(&mut self, elem_type: Tag, mut read_elem: F) -> Result> + where + F: FnMut(&mut Self) -> Result, + { + let len = self.read_int()?; + + if len.is_negative() { + return Err(Error::new_owned(format!( + "negative {} list length of {len}", + elem_type.name() + ))); + } + + let mut list = Vec::with_capacity(cautious_capacity::(len as usize)); + + for _ in 0..len { + list.push(read_elem(self)?); + } + + Ok(list) + } + + fn read_compound(&mut self) -> Result> + where + S: FromModifiedUtf8<'a> + Hash + Ord, + { + let mut compound = Compound::new(); + + loop { + let tag = self.read_tag()?; + if tag == Tag::End { + return Ok(compound); + } + + compound.insert(self.read_string::()?, self.read_value::(tag)?); + } + } + + fn read_int_array(&mut self) -> Result> { + let len = self.read_int()?; + + if len.is_negative() { + return Err(Error::new_owned(format!( + "negative int array length of {len}", + ))); + } + + if len as u64 * 4 > self.reader.remaining() as u64 { + return Err(Error::new_owned(format!( + "int array of length {len} exceeds remainder of input" + ))); + } + + let mut array = Vec::with_capacity(len as usize); + + // TODO: SIMDify the endian swapping? + for _ in 0..len { + array.push(self.read_int()?); + } + + Ok(array) + } + + fn read_long_array(&mut self) -> Result> { + let len = self.read_int()?; + + if len.is_negative() { + return Err(Error::new_owned(format!( + "negative long array length of {len}", + ))); + } + + if len as u64 * 8 > self.reader.remaining() as u64 { + return Err(Error::new_owned(format!( + "long array of length {len} exceeds remainder of input" + ))); + } + + let mut array = Vec::with_capacity(len as usize); + + // TODO: SIMDify the endian swapping? + for _ in 0..len { + array.push(self.read_long()?); + } + + Ok(array) + } +} + +/// Prevents preallocating too much memory in case we get a malicious or invalid +/// sequence length. +fn cautious_capacity(size_hint: usize) -> usize { + // TODO: How large can we make this? + const MAX_PREALLOC_BYTES: usize = 2048; + + if mem::size_of::() == 0 { + 0 + } else { + size_hint.min(MAX_PREALLOC_BYTES / mem::size_of::()) + } +} + +pub trait ReadBytes<'a> { + fn read_bytes(&mut self, count: usize) -> io::Result<&'a [u8]>; + + /// Returns the number of remaining bytes in the input. + fn remaining(&self) -> usize; +} + +impl<'a, T> ReadBytes<'a> for &mut T +where + T: ReadBytes<'a>, +{ + fn read_bytes(&mut self, count: usize) -> io::Result<&'a [u8]> { + (**self).read_bytes(count) + } + + fn remaining(&self) -> usize { + (**self).remaining() + } +} + +impl<'a> ReadBytes<'a> for &'a [u8] { + #[inline] + fn read_bytes(&mut self, count: usize) -> io::Result<&'a [u8]> { + if count > self.len() { + return Err(io::ErrorKind::UnexpectedEof.into()); + } + + let (l, r) = self.split_at(count); + *self = r; + Ok(l) + } + + fn remaining(&self) -> usize { + self.len() + } +} + +impl<'a> ReadBytes<'a> for io::Cursor<&'a [u8]> { + #[inline] + fn read_bytes(&mut self, count: usize) -> io::Result<&'a [u8]> { + let remaining_slice = + &self.get_ref()[self.position().min(self.get_ref().len() as u64) as usize..]; + + if count > remaining_slice.len() { + return Err(io::ErrorKind::UnexpectedEof.into()); + } + + self.set_position(self.position() + count as u64); + + Ok(&remaining_slice[..count]) + } + + fn remaining(&self) -> usize { + self.get_ref().len() - self.position() as usize + } +} + +pub trait FromModifiedUtf8<'de>: Sized { + fn from_modified_utf8(bytes: &'de [u8]) -> Result; +} + +impl<'a> FromModifiedUtf8<'a> for Cow<'a, str> { + fn from_modified_utf8(bytes: &'a [u8]) -> Result { + cesu8::from_java_cesu8(bytes).map_err(move |_| FromModifiedUtf8Error) + } +} + +impl<'a> FromModifiedUtf8<'a> for String { + fn from_modified_utf8(bytes: &'a [u8]) -> Result { + match cesu8::from_java_cesu8(bytes) { + Ok(str) => Ok(str.into_owned()), + Err(_) => Err(FromModifiedUtf8Error), + } + } +} + +impl<'a> FromModifiedUtf8<'a> for Box { + fn from_modified_utf8(bytes: &'a [u8]) -> Result { + String::from_modified_utf8(bytes).map(|s| s.into()) + } +} + +#[cfg(feature = "java_string")] +impl<'a> FromModifiedUtf8<'a> for Cow<'a, java_string::JavaStr> { + fn from_modified_utf8(bytes: &'a [u8]) -> Result { + java_string::JavaStr::from_modified_utf8(bytes).map_err(|_| FromModifiedUtf8Error) + } +} + +#[cfg(feature = "java_string")] +impl<'a> FromModifiedUtf8<'a> for java_string::JavaString { + fn from_modified_utf8(bytes: &'a [u8]) -> Result { + match java_string::JavaStr::from_modified_utf8(bytes) { + Ok(str) => Ok(str.into_owned()), + Err(_) => Err(FromModifiedUtf8Error), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub struct FromModifiedUtf8Error; + +impl fmt::Display for FromModifiedUtf8Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("could not decode modified UTF-8 string") + } +} + +impl std::error::Error for FromModifiedUtf8Error {} diff --git a/valence_nbt/src/binary/encode.rs b/valence_nbt/src/binary/encode.rs new file mode 100644 index 0000000..0caa152 --- /dev/null +++ b/valence_nbt/src/binary/encode.rs @@ -0,0 +1,325 @@ +use std::borrow::Cow; +use std::hash::Hash; +use std::io::{self, Write}; + +use byteorder::{BigEndian, WriteBytesExt}; + +use super::modified_utf8; +use crate::conv::i8_slice_as_u8_slice; +use crate::tag::Tag; +use crate::value::ValueRef; +use crate::{Compound, Error, List, Result}; + +/// Encode binary NBT data to the given writer. +pub fn to_binary<'a, S>( + writer: impl Write, + root_name: &(impl ToModifiedUtf8 + ?Sized), + value: impl Into>, +) -> Result<()> +where + S: ToModifiedUtf8 + Hash + Ord + 'a, +{ + let value = value.into(); + + let mut state = EncodeState { writer }; + + state.write_tag(value.tag())?; + state.write_string(root_name)?; + state.write_value(value) +} + +struct EncodeState { + writer: W, +} + +impl EncodeState { + fn write_tag(&mut self, tag: Tag) -> Result<()> { + Ok(self.writer.write_u8(tag as u8)?) + } + + fn write_value(&mut self, v: ValueRef) -> Result<()> + where + S: ToModifiedUtf8 + Hash + Ord, + { + match v { + ValueRef::Byte(v) => self.write_byte(*v), + ValueRef::Short(v) => self.write_short(*v), + ValueRef::Int(v) => self.write_int(*v), + ValueRef::Long(v) => self.write_long(*v), + ValueRef::Float(v) => self.write_float(*v), + ValueRef::Double(v) => self.write_double(*v), + ValueRef::ByteArray(v) => self.write_byte_array(v), + ValueRef::String(v) => self.write_string(v), + ValueRef::List(v) => self.write_any_list(v), + ValueRef::Compound(v) => self.write_compound(v), + ValueRef::IntArray(v) => self.write_int_array(v), + ValueRef::LongArray(v) => self.write_long_array(v), + } + } + + fn write_byte(&mut self, byte: i8) -> Result<()> { + Ok(self.writer.write_i8(byte)?) + } + + fn write_short(&mut self, short: i16) -> Result<()> { + Ok(self.writer.write_i16::(short)?) + } + + fn write_int(&mut self, int: i32) -> Result<()> { + Ok(self.writer.write_i32::(int)?) + } + + fn write_long(&mut self, long: i64) -> Result<()> { + Ok(self.writer.write_i64::(long)?) + } + + fn write_float(&mut self, float: f32) -> Result<()> { + Ok(self.writer.write_f32::(float)?) + } + + fn write_double(&mut self, double: f64) -> Result<()> { + Ok(self.writer.write_f64::(double)?) + } + + fn write_byte_array(&mut self, bytes: &[i8]) -> Result<()> { + match bytes.len().try_into() { + Ok(len) => self.write_int(len)?, + Err(_) => { + return Err(Error::new_owned(format!( + "byte array of length {} exceeds maximum of i32::MAX", + bytes.len(), + ))) + } + } + + Ok(self.writer.write_all(i8_slice_as_u8_slice(bytes))?) + } + + fn write_string(&mut self, s: &S) -> Result<()> + where + S: ToModifiedUtf8 + ?Sized, + { + let len = s.modified_uf8_len(); + + match len.try_into() { + Ok(n) => self.writer.write_u16::(n)?, + Err(_) => { + return Err(Error::new_owned(format!( + "string of length {len} exceeds maximum of u16::MAX" + ))) + } + } + + s.to_modified_utf8(len, &mut self.writer)?; + + Ok(()) + } + + fn write_any_list(&mut self, list: &List) -> Result<()> + where + S: ToModifiedUtf8 + Hash + Ord, + { + match list { + List::End => { + self.write_tag(Tag::End)?; + // Length + self.writer.write_i32::(0)?; + Ok(()) + } + List::Byte(v) => { + self.write_tag(Tag::Byte)?; + + match v.len().try_into() { + Ok(len) => self.write_int(len)?, + Err(_) => { + return Err(Error::new_owned(format!( + "byte list of length {} exceeds maximum of i32::MAX", + v.len(), + ))) + } + } + + Ok(self.writer.write_all(i8_slice_as_u8_slice(v))?) + } + List::Short(sl) => self.write_list(sl, Tag::Short, |st, v| st.write_short(*v)), + List::Int(il) => self.write_list(il, Tag::Int, |st, v| st.write_int(*v)), + List::Long(ll) => self.write_list(ll, Tag::Long, |st, v| st.write_long(*v)), + List::Float(fl) => self.write_list(fl, Tag::Float, |st, v| st.write_float(*v)), + List::Double(dl) => self.write_list(dl, Tag::Double, |st, v| st.write_double(*v)), + List::ByteArray(v) => { + self.write_list(v, Tag::ByteArray, |st, v| st.write_byte_array(v)) + } + List::String(v) => self.write_list(v, Tag::String, |st, v| st.write_string(v)), + List::List(v) => self.write_list(v, Tag::List, |st, v| st.write_any_list(v)), + List::Compound(v) => self.write_list(v, Tag::Compound, |st, v| st.write_compound(v)), + List::IntArray(v) => self.write_list(v, Tag::IntArray, |st, v| st.write_int_array(v)), + List::LongArray(v) => { + self.write_list(v, Tag::LongArray, |st, v| st.write_long_array(v)) + } + } + } + + fn write_list(&mut self, list: &[T], elem_type: Tag, mut write_elem: F) -> Result<()> + where + F: FnMut(&mut Self, &T) -> Result<()>, + { + self.write_tag(elem_type)?; + + match list.len().try_into() { + Ok(len) => self.writer.write_i32::(len)?, + Err(_) => { + return Err(Error::new_owned(format!( + "{} list of length {} exceeds maximum of i32::MAX", + list.len(), + elem_type.name() + ))) + } + } + + for elem in list { + write_elem(self, elem)?; + } + + Ok(()) + } + + fn write_compound(&mut self, c: &Compound) -> Result<()> + where + S: ToModifiedUtf8 + Hash + Ord, + { + for (k, v) in c { + self.write_tag(v.tag())?; + self.write_string(k)?; + self.write_value(v.into())?; + } + self.write_tag(Tag::End)?; + + Ok(()) + } + + fn write_int_array(&mut self, ia: &[i32]) -> Result<()> { + match ia.len().try_into() { + Ok(len) => self.write_int(len)?, + Err(_) => { + return Err(Error::new_owned(format!( + "int array of length {} exceeds maximum of i32::MAX", + ia.len(), + ))) + } + } + + for i in ia { + self.write_int(*i)?; + } + + Ok(()) + } + + fn write_long_array(&mut self, la: &[i64]) -> Result<()> { + match la.len().try_into() { + Ok(len) => self.write_int(len)?, + Err(_) => { + return Err(Error::new_owned(format!( + "long array of length {} exceeds maximum of i32::MAX", + la.len(), + ))) + } + } + + for l in la { + self.write_long(*l)?; + } + + Ok(()) + } +} + +/// A string type which can be encoded into Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8). +pub trait ToModifiedUtf8 { + fn modified_uf8_len(&self) -> usize; + fn to_modified_utf8(&self, encoded_len: usize, writer: W) -> io::Result<()>; +} + +impl ToModifiedUtf8 for str { + fn modified_uf8_len(&self) -> usize { + modified_utf8::encoded_len(self.as_bytes()) + } + + fn to_modified_utf8(&self, encoded_len: usize, mut writer: W) -> io::Result<()> { + // Conversion to modified UTF-8 always increases the size of the string. + // If the new len is equal to the original len, we know it doesn't need + // to be re-encoded. + if self.len() == encoded_len { + writer.write_all(self.as_bytes()) + } else { + modified_utf8::write_modified_utf8(writer, self) + } + } +} + +impl ToModifiedUtf8 for Cow<'_, str> { + #[inline] + fn modified_uf8_len(&self) -> usize { + str::modified_uf8_len(self) + } + + fn to_modified_utf8(&self, encoded_len: usize, writer: W) -> io::Result<()> { + str::to_modified_utf8(self, encoded_len, writer) + } +} + +impl ToModifiedUtf8 for String { + #[inline] + fn modified_uf8_len(&self) -> usize { + str::modified_uf8_len(self) + } + + fn to_modified_utf8(&self, encoded_len: usize, writer: W) -> io::Result<()> { + str::to_modified_utf8(self, encoded_len, writer) + } +} + +impl ToModifiedUtf8 for Box { + fn modified_uf8_len(&self) -> usize { + str::modified_uf8_len(self) + } + + fn to_modified_utf8(&self, encoded_len: usize, writer: W) -> io::Result<()> { + str::to_modified_utf8(self, encoded_len, writer) + } +} + +#[cfg(feature = "java_string")] +impl ToModifiedUtf8 for java_string::JavaStr { + fn modified_uf8_len(&self) -> usize { + modified_utf8::encoded_len(self.as_bytes()) + } + + fn to_modified_utf8(&self, _encoded_len: usize, mut writer: W) -> io::Result<()> { + writer.write_all(&self.to_modified_utf8()) + } +} + +#[cfg(feature = "java_string")] +impl ToModifiedUtf8 for Cow<'_, java_string::JavaStr> { + #[inline] + fn modified_uf8_len(&self) -> usize { + java_string::JavaStr::modified_uf8_len(self) + } + + fn to_modified_utf8(&self, encoded_len: usize, writer: W) -> io::Result<()> { + ::to_modified_utf8(self, encoded_len, writer) + } +} + +#[cfg(feature = "java_string")] +impl ToModifiedUtf8 for java_string::JavaString { + #[inline] + fn modified_uf8_len(&self) -> usize { + java_string::JavaStr::modified_uf8_len(self) + } + + fn to_modified_utf8(&self, encoded_len: usize, writer: W) -> io::Result<()> { + ::to_modified_utf8(self, encoded_len, writer) + } +} diff --git a/valence_nbt/src/binary/modified_utf8.rs b/valence_nbt/src/binary/modified_utf8.rs new file mode 100644 index 0000000..9ee56f7 --- /dev/null +++ b/valence_nbt/src/binary/modified_utf8.rs @@ -0,0 +1,127 @@ +//! Utilities for working with Java's "Modified UTF-8" character encoding. +//! +//! For more information, refer to [Wikipedia]. +//! +//! [Wikipedia]: https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8 + +use std::io; +use std::io::Write; +use std::str::from_utf8_unchecked; + +use byteorder::{BigEndian, WriteBytesExt}; + +pub(crate) fn write_modified_utf8(mut writer: impl Write, text: &str) -> io::Result<()> { + let bytes = text.as_bytes(); + let mut i = 0; + + while i < bytes.len() { + match bytes[i] { + 0 => { + writer.write_u16::(0xC080)?; + i += 1; + } + b @ 1..=127 => { + writer.write_u8(b)?; + i += 1; + } + b => { + let w = utf8_char_width(b); + debug_assert!(w <= 4); + debug_assert!(i + w <= bytes.len()); + + if w != 4 { + writer.write_all(&bytes[i..i + w])?; + } else { + let s = unsafe { from_utf8_unchecked(&bytes[i..i + w]) }; + let c = s.chars().next().unwrap() as u32 - 0x10000; + + let s0 = ((c >> 10) as u16) | 0xD800; + let s1 = ((c & 0x3FF) as u16) | 0xDC00; + + writer.write_all(encode_surrogate(s0).as_slice())?; + writer.write_all(encode_surrogate(s1).as_slice())?; + } + i += w; + } + } + } + + Ok(()) +} + +const fn utf8_char_width(first_byte: u8) -> usize { + const UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]; + + UTF8_CHAR_WIDTH[first_byte as usize] as usize +} + +fn encode_surrogate(surrogate: u16) -> [u8; 3] { + debug_assert!((0xD800..=0xDFFF).contains(&surrogate)); + + const TAG_CONT_U8: u8 = 0b1000_0000_u8; + [ + 0b11100000 | ((surrogate & 0b11110000_00000000) >> 12) as u8, + TAG_CONT_U8 | ((surrogate & 0b00001111_11000000) >> 6) as u8, + TAG_CONT_U8 | (surrogate & 0b00000000_00111111) as u8, + ] +} + +pub(crate) fn encoded_len(bytes: &[u8]) -> usize { + let mut n = 0; + let mut i = 0; + + while i < bytes.len() { + match bytes[i] { + // Fast path for ASCII here makes a huge difference in benchmarks. + 1..=127 => { + n += 1; + i += 1; + } + 0 => { + n += 2; + i += 1; + } + b => { + let w = utf8_char_width(b); + + if w == 4 { + n += 6; + } else { + n += w; + } + + i += w; + } + } + } + + n +} + +#[cfg(test)] +#[test] +fn equivalence() { + fn check(s: &str) { + let mut ours = vec![]; + + let theirs = cesu8::to_java_cesu8(s); + write_modified_utf8(&mut ours, s).unwrap(); + + assert_eq!(theirs, ours); + assert_eq!(theirs.len(), encoded_len(s.as_bytes())); + } + + check("Mary had a little lamb\0"); + check("๐Ÿคก๐Ÿ’ฉ๐Ÿ‘ป๐Ÿ’€โ˜ ๐Ÿ‘ฝ๐Ÿ‘พ๐Ÿค–๐ŸŽƒ๐Ÿ˜บ๐Ÿ˜ธ๐Ÿ˜น๐Ÿ˜ป๐Ÿ˜ผ๐Ÿ˜ฝ๐Ÿ™€๐Ÿ˜ฟ๐Ÿ˜พ"); + check("ร…ร†ร‡รˆร˜รตรทยฃยฅรฝ"); +} diff --git a/valence_nbt/src/binary/tests.rs b/valence_nbt/src/binary/tests.rs new file mode 100644 index 0000000..635b182 --- /dev/null +++ b/valence_nbt/src/binary/tests.rs @@ -0,0 +1,120 @@ +use crate::tag::Tag; +use crate::{compound, from_binary, to_binary, Compound, List, Value}; + +const ROOT_NAME: &str = "The root nameโ€ฝ"; + +#[test] +fn round_trip() { + let mut buf = vec![]; + + let compound = example_compound(); + + to_binary(&mut buf, ROOT_NAME, &compound).unwrap(); + + println!("{buf:?}"); + + let (root_name, decoded) = from_binary(&mut buf.as_slice()).unwrap().unwrap(); + + assert_eq!(root_name, ROOT_NAME); + assert_eq!(Value::from(compound), decoded); +} + +#[test] +fn check_min_sizes() { + fn check(min_val: Value, expected_size: usize) { + /// TAG_Compound + root name + field tag + field name + TAG_End + const COMPOUND_OVERHEAD: usize = 1 + 2 + 1 + 2 + 1; + + let dbg = format!("{min_val:?}"); + let mut buf = vec![]; + + to_binary(&mut buf, "", &compound!("" => min_val)).unwrap(); + + assert_eq!( + expected_size, + buf.len() - COMPOUND_OVERHEAD, + "size mismatch for {dbg}" + ); + } + + check(Value::Byte(0), 1); + check(Value::Short(0), 2); + check(Value::Int(0), 4); + check(Value::Long(0), 8); + check(Value::Float(0.0), 4); + check(Value::Double(0.0), 8); + check(Value::ByteArray([].into()), 4); + check(Value::String("".into()), 2); + check(Value::List(Vec::::new().into()), 5); + check(Value::Compound(compound!()), 1); + check(Value::IntArray([].into()), 4); + check(Value::LongArray([].into()), 4); +} + +#[test] +fn deeply_nested_compound_decode() { + let mut buf = vec![Tag::Compound as u8, 0, 0]; // Root compound + let n = 10_000; + + for _ in 0..n { + buf.extend([Tag::Compound as u8, 0, 0]); + } + + buf.extend((0..n).map(|_| Tag::End as u8)); + + buf.push(Tag::End as u8); // End root compound + + // Should not overflow the stack + let _ = from_binary::(&mut buf.as_slice()); +} + +#[test] +fn deeply_nested_list_decode() { + // Root compound with one field. + let mut buf = vec![Tag::Compound as u8, 0, 0, Tag::List as u8, 0, 0]; + let n = 10_000; + + for _ in 0..n - 1 { + buf.extend([Tag::List as u8, 0, 0, 0, 1]); // List of list + } + + // Last list is an empty list of bytes. + buf.extend([Tag::Byte as u8, 0, 0, 0, 0]); + + buf.push(Tag::End as u8); // End root compound + + // Should not overflow the stack + let _ = from_binary::(&mut buf.as_slice()); +} + +fn example_compound() -> Compound { + fn inner() -> Compound { + compound! { + "int" => i32::MIN, + "long" => i64::MAX, + "float" => 1e10_f32, + "double" => f64::INFINITY, + } + } + + compound! { + "byte" => 123_i8, + "list_of_int" => List::Int(vec![3, -7, 5]), + "list_of_string" => List::String(vec![ + "foo".to_owned(), + "bar".to_owned(), + "baz".to_owned() + ]), + "list_of_end" => List::End, + "string" => "aรฉๆ—ฅ", + "compound" => inner(), + "list_of_compound" => List::Compound(vec![ + inner(), + inner(), + inner(), + ]), + "int_array" => vec![5, -9, i32::MIN, 0, i32::MAX], + "byte_array" => vec![0_i8, 2, 3], + "long_array" => vec![123_i64, 456, 789], + } +} diff --git a/valence_nbt/src/compound.rs b/valence_nbt/src/compound.rs new file mode 100644 index 0000000..913cae5 --- /dev/null +++ b/valence_nbt/src/compound.rs @@ -0,0 +1,725 @@ +use std::borrow::{Borrow, Cow}; +use std::fmt; +use std::hash::Hash; +use std::iter::FusedIterator; +use std::ops::{Index, IndexMut}; + +use crate::Value; + +/// A map type with [`String`] keys and [`Value`] values. +#[derive(Clone, Default)] +pub struct Compound { + map: Map, +} + +#[cfg(not(feature = "preserve_order"))] +type Map = std::collections::BTreeMap>; + +#[cfg(feature = "preserve_order")] +type Map = indexmap::IndexMap>; + +impl fmt::Debug for Compound { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.map.fmt(f) + } +} + +impl PartialEq for Compound +where + S: Ord + Hash, +{ + fn eq(&self, other: &Self) -> bool { + self.map == other.map + } +} + +#[cfg(feature = "serde")] +impl serde::Serialize for Compound +where + Str: Ord + Hash + serde::Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.map.serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, S> serde::Deserialize<'de> for Compound +where + S: Ord + Hash + serde::Deserialize<'de>, +{ + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + Map::::deserialize(deserializer).map(|map| Self { map }) + } + + fn deserialize_in_place(deserializer: D, place: &mut Self) -> Result<(), D::Error> + where + D: serde::Deserializer<'de>, + { + Map::::deserialize_in_place(deserializer, &mut place.map) + } +} + +impl Compound { + pub fn new() -> Self { + Self { map: Map::new() } + } + + pub fn with_capacity(cap: usize) -> Self { + Self { + #[cfg(not(feature = "preserve_order"))] + map: { + // BTreeMap does not have with_capacity. + let _ = cap; + Map::new() + }, + #[cfg(feature = "preserve_order")] + map: Map::with_capacity(cap), + } + } + + pub fn clear(&mut self) { + self.map.clear(); + } +} + +impl Compound +where + S: Ord + Hash, +{ + pub fn get(&self, k: &Q) -> Option<&Value> + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + self.map.get(k.as_borrowed()) + } + + pub fn contains_key(&self, k: &Q) -> bool + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + self.map.contains_key(k.as_borrowed()) + } + + pub fn get_mut(&mut self, k: &Q) -> Option<&mut Value> + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + self.map.get_mut(k.as_borrowed()) + } + + pub fn get_key_value(&self, k: &Q) -> Option<(&S, &Value)> + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + self.map.get_key_value(k.as_borrowed()) + } + + pub fn insert(&mut self, k: K, v: V) -> Option> + where + K: Into, + V: Into>, + { + self.map.insert(k.into(), v.into()) + } + + pub fn remove(&mut self, k: &Q) -> Option> + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + #[cfg(feature = "preserve_order")] + return self.swap_remove(k); + #[cfg(not(feature = "preserve_order"))] + return self.map.remove(k.as_borrowed()); + } + + #[cfg(feature = "preserve_order")] + pub fn swap_remove(&mut self, k: &Q) -> Option> + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + self.map.swap_remove(k.as_borrowed()) + } + + #[cfg(feature = "preserve_order")] + pub fn shift_remove(&mut self, k: &Q) -> Option> + where + Q: ?Sized + AsBorrowed, + >::Borrowed: Hash + Ord, + S: Borrow<>::Borrowed>, + { + self.map.shift_remove(k.as_borrowed()) + } + + pub fn remove_entry(&mut self, k: &Q) -> Option<(S, Value)> + where + S: Borrow, + Q: ?Sized + Ord + Hash, + { + #[cfg(feature = "preserve_order")] + return self.swap_remove_entry(k); + #[cfg(not(feature = "preserve_order"))] + return self.map.remove_entry(k); + } + + #[cfg(feature = "preserve_order")] + pub fn swap_remove_entry(&mut self, k: &Q) -> Option<(S, Value)> + where + S: Borrow, + Q: ?Sized + Ord + Hash, + { + self.map.swap_remove_entry(k) + } + + #[cfg(feature = "preserve_order")] + pub fn shift_remove_entry(&mut self, k: &Q) -> Option<(S, Value)> + where + S: Borrow, + Q: ?Sized + Ord + Hash, + { + self.map.shift_remove_entry(k) + } + + pub fn append(&mut self, other: &mut Self) { + #[cfg(not(feature = "preserve_order"))] + self.map.append(&mut other.map); + + #[cfg(feature = "preserve_order")] + for (k, v) in std::mem::take(&mut other.map) { + self.map.insert(k, v); + } + } + + pub fn entry(&mut self, k: K) -> Entry + where + K: Into, + { + #[cfg(not(feature = "preserve_order"))] + use std::collections::btree_map::Entry as EntryImpl; + + #[cfg(feature = "preserve_order")] + use indexmap::map::Entry as EntryImpl; + + match self.map.entry(k.into()) { + EntryImpl::Vacant(ve) => Entry::Vacant(VacantEntry { entry: ve }), + EntryImpl::Occupied(oe) => Entry::Occupied(OccupiedEntry { entry: oe }), + } + } + + pub fn len(&self) -> usize { + self.map.len() + } + + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + pub fn iter(&self) -> Iter { + Iter { + iter: self.map.iter(), + } + } + + pub fn iter_mut(&mut self) -> IterMut { + IterMut { + iter: self.map.iter_mut(), + } + } + + pub fn keys(&self) -> Keys { + Keys { + iter: self.map.keys(), + } + } + + pub fn values(&self) -> Values { + Values { + iter: self.map.values(), + } + } + + pub fn values_mut(&mut self) -> ValuesMut { + ValuesMut { + iter: self.map.values_mut(), + } + } + + pub fn retain(&mut self, f: F) + where + F: FnMut(&S, &mut Value) -> bool, + { + self.map.retain(f) + } + + /// Inserts all items from `other` into `self` recursively. + /// + /// # Example + /// + /// ``` + /// use valence_nbt::compound; + /// + /// let mut this = compound! { + /// "foo" => 10, + /// "bar" => compound! { + /// "baz" => 20, + /// } + /// }; + /// + /// let other = compound! { + /// "foo" => 15, + /// "bar" => compound! { + /// "quux" => "hello", + /// } + /// }; + /// + /// this.merge(other); + /// + /// assert_eq!( + /// this, + /// compound! { + /// "foo" => 15, + /// "bar" => compound! { + /// "baz" => 20, + /// "quux" => "hello", + /// } + /// } + /// ); + /// ``` + pub fn merge(&mut self, other: Compound) { + for (k, v) in other { + match (self.entry(k), v) { + (Entry::Occupied(mut oe), Value::Compound(other)) => { + if let Value::Compound(this) = oe.get_mut() { + // Insert compound recursively. + this.merge(other); + } + } + (Entry::Occupied(mut oe), value) => { + oe.insert(value); + } + (Entry::Vacant(ve), value) => { + ve.insert(value); + } + } + } + } +} + +/// Trait that can be used as a key to query a compound. Basically something +/// that can be converted to a type `B` such that `S: Borrow`. +pub trait AsBorrowed { + type Borrowed: ?Sized; + + fn as_borrowed(&self) -> &Self::Borrowed; +} + +impl AsBorrowed for Q +where + String: Borrow, +{ + type Borrowed = Q; + + #[inline] + fn as_borrowed(&self) -> &Q { + self + } +} + +impl<'a, Q: ?Sized> AsBorrowed> for Q +where + Cow<'a, str>: Borrow, +{ + type Borrowed = Q; + + #[inline] + fn as_borrowed(&self) -> &Q { + self + } +} + +#[cfg(feature = "java_string")] +impl AsBorrowed for Q +where + for<'a> &'a Q: Into<&'a java_string::JavaStr>, +{ + type Borrowed = java_string::JavaStr; + + fn as_borrowed(&self) -> &Self::Borrowed { + self.into() + } +} + +#[cfg(feature = "java_string")] +impl AsBorrowed> for Q +where + for<'a> &'a Q: Into<&'a java_string::JavaStr>, +{ + type Borrowed = java_string::JavaStr; + + fn as_borrowed(&self) -> &Self::Borrowed { + self.into() + } +} + +impl Extend<(S, Value)> for Compound +where + S: Ord + Hash, +{ + fn extend(&mut self, iter: T) + where + T: IntoIterator)>, + { + self.map.extend(iter) + } +} + +impl FromIterator<(S, Value)> for Compound +where + S: Ord + Hash, +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator)>, + { + Self { + map: Map::from_iter(iter), + } + } +} + +pub enum Entry<'a, S = String> { + Vacant(VacantEntry<'a, S>), + Occupied(OccupiedEntry<'a, S>), +} + +impl<'a, S> Entry<'a, S> +where + S: Hash + Ord, +{ + pub fn key(&self) -> &S { + match self { + Entry::Vacant(ve) => ve.key(), + Entry::Occupied(oe) => oe.key(), + } + } + + pub fn or_insert>>(self, default: V) -> &'a mut Value { + match self { + Entry::Vacant(ve) => ve.insert(default), + Entry::Occupied(oe) => oe.into_mut(), + } + } + + pub fn or_insert_with(self, default: F) -> &'a mut Value + where + F: FnOnce() -> V, + V: Into>, + { + match self { + Entry::Vacant(ve) => ve.insert(default()), + Entry::Occupied(oe) => oe.into_mut(), + } + } + + pub fn and_modify(self, f: F) -> Self + where + F: FnOnce(&mut Value), + { + match self { + Entry::Vacant(ve) => Entry::Vacant(ve), + Entry::Occupied(mut oe) => { + f(oe.get_mut()); + Entry::Occupied(oe) + } + } + } +} + +impl fmt::Debug for Entry<'_, S> +where + S: fmt::Debug + Ord, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Vacant(entry) => f.debug_tuple("Vacant").field(entry).finish(), + Self::Occupied(entry) => f.debug_tuple("Occupied").field(entry).finish(), + } + } +} + +pub struct VacantEntry<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + entry: std::collections::btree_map::VacantEntry<'a, S, Value>, + #[cfg(feature = "preserve_order")] + entry: indexmap::map::VacantEntry<'a, S, Value>, +} + +impl<'a, S> VacantEntry<'a, S> +where + S: Ord + Hash, +{ + pub fn key(&self) -> &S { + self.entry.key() + } + + pub fn insert>>(self, v: V) -> &'a mut Value { + self.entry.insert(v.into()) + } +} + +impl fmt::Debug for VacantEntry<'_, S> +where + S: fmt::Debug + Ord, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("VacantEntry") + .field("entry", &self.entry) + .finish() + } +} + +pub struct OccupiedEntry<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + entry: std::collections::btree_map::OccupiedEntry<'a, S, Value>, + #[cfg(feature = "preserve_order")] + entry: indexmap::map::OccupiedEntry<'a, S, Value>, +} + +impl<'a, S> OccupiedEntry<'a, S> +where + S: Hash + Ord, +{ + pub fn key(&self) -> &S { + self.entry.key() + } + + pub fn get(&self) -> &Value { + self.entry.get() + } + + pub fn get_mut(&mut self) -> &mut Value { + self.entry.get_mut() + } + + pub fn into_mut(self) -> &'a mut Value { + self.entry.into_mut() + } + + pub fn insert>>(&mut self, v: V) -> Value { + self.entry.insert(v.into()) + } + + pub fn remove(self) -> Value { + #[cfg(feature = "preserve_order")] + return self.swap_remove(); + #[cfg(not(feature = "preserve_order"))] + return self.entry.remove(); + } + + #[cfg(feature = "preserve_order")] + pub fn swap_remove(self) -> Value { + self.entry.swap_remove() + } + + #[cfg(feature = "preserve_order")] + pub fn shift_remove(self) -> Value { + self.entry.shift_remove() + } +} + +impl fmt::Debug for OccupiedEntry<'_, S> +where + S: fmt::Debug + Ord, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OccupiedEntry") + .field("entry", &self.entry) + .finish() + } +} + +impl Index<&'_ Q> for Compound +where + S: Borrow + Ord + Hash, + Q: ?Sized + Ord + Hash, +{ + type Output = Value; + + fn index(&self, index: &Q) -> &Self::Output { + self.map.index(index) + } +} + +impl IndexMut<&'_ Q> for Compound +where + S: Borrow + Hash + Ord, + Q: ?Sized + Ord + Hash, +{ + fn index_mut(&mut self, index: &Q) -> &mut Self::Output { + self.map.get_mut(index).expect("no entry found for key") + } +} + +macro_rules! impl_iterator_traits { + (($name:ident $($generics:tt)*) => $item:ty) => { + impl $($generics)* Iterator for $name $($generics)* { + type Item = $item; + #[inline] + fn next(&mut self) -> Option { + self.iter.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + } + + #[cfg(feature = "preserve_order")] + impl $($generics)* DoubleEndedIterator for $name $($generics)* { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back() + } + } + + impl $($generics)* ExactSizeIterator for $name $($generics)* { + #[inline] + fn len(&self) -> usize { + self.iter.len() + } + } + + impl $($generics)* FusedIterator for $name $($generics)* {} + } +} + +impl<'a, S> IntoIterator for &'a Compound { + type Item = (&'a S, &'a Value); + type IntoIter = Iter<'a, S>; + + fn into_iter(self) -> Self::IntoIter { + Iter { + iter: self.map.iter(), + } + } +} + +#[derive(Clone, Debug)] +pub struct Iter<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + iter: std::collections::btree_map::Iter<'a, S, Value>, + #[cfg(feature = "preserve_order")] + iter: indexmap::map::Iter<'a, S, Value>, +} + +impl_iterator_traits!((Iter<'a, S>) => (&'a S, &'a Value)); + +impl<'a, S> IntoIterator for &'a mut Compound { + type Item = (&'a S, &'a mut Value); + type IntoIter = IterMut<'a, S>; + + fn into_iter(self) -> Self::IntoIter { + IterMut { + iter: self.map.iter_mut(), + } + } +} + +#[derive(Debug)] +pub struct IterMut<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + iter: std::collections::btree_map::IterMut<'a, S, Value>, + #[cfg(feature = "preserve_order")] + iter: indexmap::map::IterMut<'a, S, Value>, +} + +impl_iterator_traits!((IterMut<'a, S>) => (&'a S, &'a mut Value)); + +impl IntoIterator for Compound { + type Item = (S, Value); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter { + iter: self.map.into_iter(), + } + } +} + +#[derive(Debug)] +pub struct IntoIter { + #[cfg(not(feature = "preserve_order"))] + iter: std::collections::btree_map::IntoIter>, + #[cfg(feature = "preserve_order")] + iter: indexmap::map::IntoIter>, +} + +impl_iterator_traits!((IntoIter) => (S, Value)); + +#[derive(Clone, Debug)] +pub struct Keys<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + iter: std::collections::btree_map::Keys<'a, S, Value>, + #[cfg(feature = "preserve_order")] + iter: indexmap::map::Keys<'a, S, Value>, +} + +impl_iterator_traits!((Keys<'a, S>) => &'a S); + +#[derive(Clone, Debug)] +pub struct Values<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + iter: std::collections::btree_map::Values<'a, S, Value>, + #[cfg(feature = "preserve_order")] + iter: indexmap::map::Values<'a, S, Value>, +} + +impl_iterator_traits!((Values<'a, S>) => &'a Value); + +#[derive(Debug)] +pub struct ValuesMut<'a, S = String> { + #[cfg(not(feature = "preserve_order"))] + iter: std::collections::btree_map::ValuesMut<'a, S, Value>, + #[cfg(feature = "preserve_order")] + iter: indexmap::map::ValuesMut<'a, S, Value>, +} + +impl_iterator_traits!((ValuesMut<'a, S>) => &'a mut Value); + +#[cfg(test)] +mod tests { + #[cfg(feature = "preserve_order")] + #[test] + fn compound_preserves_order() { + use super::*; + + let letters = ["g", "b", "d", "e", "h", "z", "m", "a", "q"]; + + let mut c = Compound::::new(); + for l in letters { + c.insert(l, 0_i8); + } + + for (k, l) in c.keys().zip(letters) { + assert_eq!(k, l); + } + } +} diff --git a/valence_nbt/src/conv.rs b/valence_nbt/src/conv.rs new file mode 100644 index 0000000..9ff0bb4 --- /dev/null +++ b/valence_nbt/src/conv.rs @@ -0,0 +1,52 @@ +//! Zero-cost conversion functions for `valence_nbt`. +//! +//! While working with [`Value`], it is often necessary to convert between +//! collections of signed and unsigned integer types due to API +//! differences. For instance, you may be given a `&[i8]` from +//! [`Value::ByteArray`], but functions like [`Write::write_all`] expect to +//! receive a `&[u8]`. +//! +//! This module provides functions to perform conversions between these types +//! with zero-cost and no `unsafe` code on your part. +//! +//! [`Value`]: crate::Value +//! [`Value::ByteArray`]: crate::Value::ByteArray +//! [`Write::write_all`]: std::io::Write::write_all + +use std::mem::ManuallyDrop; + +/// Converts a `Vec` into a `Vec` without cloning. +#[inline] +pub fn u8_vec_into_i8_vec(vec: Vec) -> Vec { + // SAFETY: Layouts of u8 and i8 are the same and we're being careful not to drop + // the original vec after calling Vec::from_raw_parts. + unsafe { + let mut vec = ManuallyDrop::new(vec); + Vec::from_raw_parts(vec.as_mut_ptr() as *mut i8, vec.len(), vec.capacity()) + } +} + +/// Converts a `Vec` into a `Vec` without cloning. +#[inline] +pub fn i8_vec_into_u8_vec(vec: Vec) -> Vec { + // SAFETY: Layouts of u8 and i8 are the same and we're being careful not to drop + // the original vec after calling Vec::from_raw_parts. + unsafe { + let mut vec = ManuallyDrop::new(vec); + Vec::from_raw_parts(vec.as_mut_ptr() as *mut u8, vec.len(), vec.capacity()) + } +} + +/// Converts a `&[u8]` into a `&[i8]`. +#[inline] +pub fn u8_slice_as_i8_slice(slice: &[u8]) -> &[i8] { + // SAFETY: i8 has the same layout as u8. + unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const i8, slice.len()) } +} + +/// Converts a `&[i8]` into a `&[u8]`. +#[inline] +pub fn i8_slice_as_u8_slice(slice: &[i8]) -> &[u8] { + // SAFETY: i8 has the same layout as u8. + unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len()) } +} diff --git a/valence_nbt/src/error.rs b/valence_nbt/src/error.rs new file mode 100644 index 0000000..8c5fc8c --- /dev/null +++ b/valence_nbt/src/error.rs @@ -0,0 +1,63 @@ +use std::error::Error as StdError; +use std::fmt::{Display, Formatter}; +use std::io; + +pub type Result = std::result::Result; + +/// Errors that can occur when encoding or decoding binary NBT. +#[derive(Debug)] +pub struct Error { + /// Box this to keep the size of `Result` small. + cause: Box, +} + +#[derive(Debug)] +enum Cause { + Io(io::Error), + Owned(Box), + Static(&'static str), +} + +impl Error { + #[allow(dead_code)] + pub(crate) fn new_owned(msg: impl Into>) -> Self { + Self { + cause: Box::new(Cause::Owned(msg.into())), + } + } + + #[allow(dead_code)] + pub(crate) fn new_static(msg: &'static str) -> Self { + Self { + cause: Box::new(Cause::Static(msg)), + } + } +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &*self.cause { + Cause::Io(e) => e.fmt(f), + Cause::Owned(msg) => write!(f, "{msg}"), + Cause::Static(msg) => write!(f, "{msg}"), + } + } +} + +impl StdError for Error { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + match &*self.cause { + Cause::Io(e) => Some(e), + Cause::Owned(_) => None, + Cause::Static(_) => None, + } + } +} + +impl From for Error { + fn from(e: io::Error) -> Self { + Self { + cause: Box::new(Cause::Io(e)), + } + } +} diff --git a/valence_nbt/src/lib.rs b/valence_nbt/src/lib.rs new file mode 100644 index 0000000..30b1ed4 --- /dev/null +++ b/valence_nbt/src/lib.rs @@ -0,0 +1,102 @@ +#![doc = include_str!("../../README.md")] +// Run locally with `RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features --open` +#![cfg_attr(docsrs, feature(doc_cfg))] + +#[cfg(feature = "binary")] +#[cfg_attr(docsrs, doc(cfg(feature = "binary")))] +pub use binary::{from_binary, to_binary}; +pub use compound::Compound; +pub use error::*; +pub use list::List; +pub use tag::*; +pub use value::Value; + +#[cfg(feature = "binary")] +#[cfg_attr(docsrs, doc(cfg(feature = "binary")))] +pub mod binary; +pub mod compound; +pub mod conv; +mod error; +pub mod list; +#[cfg(feature = "serde")] +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] +pub mod serde; +#[cfg(feature = "snbt")] +#[cfg_attr(docsrs, doc(cfg(feature = "snbt")))] +pub mod snbt; +mod tag; +pub mod value; + +/// A convenience macro for constructing [`Compound`]s. +/// +/// Key expressions must implement `Into` while value expressions must +/// implement `Into`. +/// +/// # Examples +/// +/// ``` +/// use valence_nbt::{compound, List}; +/// +/// let c = compound! { +/// "byte" => 123_i8, +/// "list_of_int" => List::Int(vec![3, -7, 5]), +/// "list_of_string" => List::String(vec![ +/// "foo".to_owned(), +/// "bar".to_owned(), +/// "baz".to_owned() +/// ]), +/// "string" => "aรฉๆ—ฅ", +/// "compound" => compound! { +/// "foo" => 1, +/// "bar" => 2, +/// "baz" => 3, +/// }, +/// "int_array" => vec![5, -9, i32::MIN, 0, i32::MAX], +/// "byte_array" => vec![0_i8, 2, 3], +/// "long_array" => vec![123_i64, 456, 789], +/// }; +/// +/// println!("{c:?}"); +/// ``` +/// +/// It is also possible to specify a custom string type like this: +/// ``` +/// # use std::borrow::Cow; +/// +/// use valence_nbt::compound; +/// +/// let c = compound! { > +/// "foo" => 123_i8, +/// }; +/// +/// println!("{c:?}"); +/// ``` +#[macro_export] +macro_rules! compound { + (<$string_type:ty> $($key:expr => $value:expr),* $(,)?) => { + <$crate::Compound<$string_type> as ::std::iter::FromIterator<($string_type, $crate::Value<$string_type>)>>::from_iter([ + $( + ( + ::std::convert::Into::<$string_type>::into($key), + ::std::convert::Into::<$crate::Value<$string_type>>::into($value) + ), + )* + ]) + }; + + ($($key:expr => $value:expr),* $(,)?) => { + compound!(<::std::string::String> $($key => $value),*) + }; +} + +/// A convenience macro for constructing [`Compound`]`<`[`JavaString`]`>`s +/// +/// [`JavaString`]: java_string::JavaString +#[cfg(feature = "java_string")] +#[cfg_attr(docsrs, doc(cfg(feature = "java_string")))] +#[macro_export] +macro_rules! jcompound { + ($($key:expr => $value:expr),* $(,)?) => { + compound!(<::java_string::JavaString> $($key => $value),*) + } +} diff --git a/valence_nbt/src/list.rs b/valence_nbt/src/list.rs new file mode 100644 index 0000000..1592501 --- /dev/null +++ b/valence_nbt/src/list.rs @@ -0,0 +1,977 @@ +use std::borrow::Cow; +use std::hash::Hash; +use std::iter::FusedIterator; + +use crate::value::{ValueMut, ValueRef}; +use crate::{Compound, Tag, Value}; + +/// An NBT list value. +/// +/// NBT lists are homogeneous, meaning each list element must be of the same +/// type. This is opposed to a format like JSON where lists can be +/// heterogeneous. Here is a JSON list that would be illegal in NBT: +/// +/// ```json +/// [42, "hello", {}] +/// ``` +/// +/// Every possible element type has its own variant in this enum. As a result, +/// heterogeneous lists are unrepresentable. +#[derive(Clone, Default, Debug)] +pub enum List { + /// The list with the element type of `TAG_End` and length of zero. + #[default] + End, + Byte(Vec), + Short(Vec), + Int(Vec), + Long(Vec), + Float(Vec), + Double(Vec), + ByteArray(Vec>), + String(Vec), + List(Vec>), + Compound(Vec>), + IntArray(Vec>), + LongArray(Vec>), +} + +impl PartialEq for List +where + S: Ord + Hash, +{ + fn eq(&self, other: &Self) -> bool { + match self { + List::End => matches!(other, List::End), + List::Byte(list) => matches!(other, List::Byte(other_list) if list == other_list), + List::Short(list) => matches!(other, List::Short(other_list) if list == other_list), + List::Int(list) => matches!(other, List::Int(other_list) if list == other_list), + List::Long(list) => matches!(other, List::Long(other_list) if list == other_list), + List::Float(list) => matches!(other, List::Float(other_list) if list == other_list), + List::Double(list) => matches!(other, List::Double(other_list) if list == other_list), + List::ByteArray(list) => { + matches!(other, List::ByteArray(other_list) if list == other_list) + } + List::String(list) => matches!(other, List::String(other_list) if list == other_list), + List::List(list) => matches!(other, List::List(other_list) if list == other_list), + List::Compound(list) => { + matches!(other, List::Compound(other_list) if list == other_list) + } + List::IntArray(list) => { + matches!(other, List::IntArray(other_list) if list == other_list) + } + List::LongArray(list) => { + matches!(other, List::LongArray(other_list) if list == other_list) + } + } + } +} + +impl List { + /// Constructs a new empty NBT list, with the element type of `TAG_End`. + pub fn new() -> Self { + Self::End + } + + /// Returns the length of this list. + pub fn len(&self) -> usize { + match self { + List::End => 0, + List::Byte(l) => l.len(), + List::Short(l) => l.len(), + List::Int(l) => l.len(), + List::Long(l) => l.len(), + List::Float(l) => l.len(), + List::Double(l) => l.len(), + List::ByteArray(l) => l.len(), + List::String(l) => l.len(), + List::List(l) => l.len(), + List::Compound(l) => l.len(), + List::IntArray(l) => l.len(), + List::LongArray(l) => l.len(), + } + } + + /// Returns `true` if this list has no elements. `false` otherwise. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the element type of this list. + pub fn element_tag(&self) -> Tag { + match self { + List::End => Tag::End, + List::Byte(_) => Tag::Byte, + List::Short(_) => Tag::Short, + List::Int(_) => Tag::Int, + List::Long(_) => Tag::Long, + List::Float(_) => Tag::Float, + List::Double(_) => Tag::Double, + List::ByteArray(_) => Tag::ByteArray, + List::String(_) => Tag::String, + List::List(_) => Tag::List, + List::Compound(_) => Tag::Compound, + List::IntArray(_) => Tag::IntArray, + List::LongArray(_) => Tag::LongArray, + } + } + + /// Gets a reference to the value at the given index in this list, or `None` + /// if the index is out of bounds. + pub fn get(&self, index: usize) -> Option> { + match self { + List::End => None, + List::Byte(list) => list.get(index).map(ValueRef::Byte), + List::Short(list) => list.get(index).map(ValueRef::Short), + List::Int(list) => list.get(index).map(ValueRef::Int), + List::Long(list) => list.get(index).map(ValueRef::Long), + List::Float(list) => list.get(index).map(ValueRef::Float), + List::Double(list) => list.get(index).map(ValueRef::Double), + List::ByteArray(list) => list.get(index).map(|arr| ValueRef::ByteArray(&arr[..])), + List::String(list) => list.get(index).map(ValueRef::String), + List::List(list) => list.get(index).map(ValueRef::List), + List::Compound(list) => list.get(index).map(ValueRef::Compound), + List::IntArray(list) => list.get(index).map(|arr| ValueRef::IntArray(&arr[..])), + List::LongArray(list) => list.get(index).map(|arr| ValueRef::LongArray(&arr[..])), + } + } + + /// Gets a mutable reference to the value at the given index in this list, + /// or `None` if the index is out of bounds. + pub fn get_mut(&mut self, index: usize) -> Option> { + match self { + List::End => None, + List::Byte(list) => list.get_mut(index).map(ValueMut::Byte), + List::Short(list) => list.get_mut(index).map(ValueMut::Short), + List::Int(list) => list.get_mut(index).map(ValueMut::Int), + List::Long(list) => list.get_mut(index).map(ValueMut::Long), + List::Float(list) => list.get_mut(index).map(ValueMut::Float), + List::Double(list) => list.get_mut(index).map(ValueMut::Double), + List::ByteArray(list) => list.get_mut(index).map(ValueMut::ByteArray), + List::String(list) => list.get_mut(index).map(ValueMut::String), + List::List(list) => list.get_mut(index).map(ValueMut::List), + List::Compound(list) => list.get_mut(index).map(ValueMut::Compound), + List::IntArray(list) => list.get_mut(index).map(ValueMut::IntArray), + List::LongArray(list) => list.get_mut(index).map(ValueMut::LongArray), + } + } + + /// Attempts to add the given value to the end of this list, failing if + /// adding the value would result in the list not being heterogeneous (have + /// multiple types inside it). Returns `true` if the value was added, + /// `false` otherwise. + #[must_use] + pub fn try_push>>(&mut self, value: V) -> bool { + let value = value.into(); + match self { + List::End => { + *self = List::from(value); + true + } + List::Byte(list) => { + if let Value::Byte(value) = value { + list.push(value); + true + } else { + false + } + } + List::Short(list) => { + if let Value::Short(value) = value { + list.push(value); + true + } else { + false + } + } + List::Int(list) => { + if let Value::Int(value) = value { + list.push(value); + true + } else { + false + } + } + List::Long(list) => { + if let Value::Long(value) = value { + list.push(value); + true + } else { + false + } + } + List::Float(list) => { + if let Value::Float(value) = value { + list.push(value); + true + } else { + false + } + } + List::Double(list) => { + if let Value::Double(value) = value { + list.push(value); + true + } else { + false + } + } + List::ByteArray(list) => { + if let Value::ByteArray(value) = value { + list.push(value); + true + } else { + false + } + } + List::String(list) => { + if let Value::String(value) = value { + list.push(value); + true + } else { + false + } + } + List::List(list) => { + if let Value::List(value) = value { + list.push(value); + true + } else { + false + } + } + List::Compound(list) => { + if let Value::Compound(value) = value { + list.push(value); + true + } else { + false + } + } + List::IntArray(list) => { + if let Value::IntArray(value) = value { + list.push(value); + true + } else { + false + } + } + List::LongArray(list) => { + if let Value::LongArray(value) = value { + list.push(value); + true + } else { + false + } + } + } + } + + /// Attempts to insert the given value at the given index in this list, + /// failing if adding the value would result in the list not being + /// heterogeneous (have multiple types inside it). Returns `true` if the + /// value was added, `false` otherwise. + /// + /// # Panics + /// + /// Panics if the index is greater than the length of the list. + #[must_use] + pub fn try_insert>>(&mut self, index: usize, value: V) -> bool { + let value = value.into(); + + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("insertion index (is {index}) should be <= len (is {len})"); + } + + match self { + List::End => { + if index > 0 { + assert_failed(index, 0); + } + *self = List::from(value); + true + } + List::Byte(list) => { + if let Value::Byte(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::Short(list) => { + if let Value::Short(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::Int(list) => { + if let Value::Int(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::Long(list) => { + if let Value::Long(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::Float(list) => { + if let Value::Float(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::Double(list) => { + if let Value::Double(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::ByteArray(list) => { + if let Value::ByteArray(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::String(list) => { + if let Value::String(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::List(list) => { + if let Value::List(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::Compound(list) => { + if let Value::Compound(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::IntArray(list) => { + if let Value::IntArray(value) = value { + list.insert(index, value); + true + } else { + false + } + } + List::LongArray(list) => { + if let Value::LongArray(value) = value { + list.insert(index, value); + true + } else { + false + } + } + } + } + + /// Removes the element at the given index in the list, and returns the + /// value removed. + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + #[track_caller] + pub fn remove(&mut self, index: usize) -> Value { + #[cold] + #[inline(never)] + #[track_caller] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("removal index (is {index}) should be < len (is {len})"); + } + + let removed = match self { + List::End => assert_failed(index, 0), + List::Byte(list) => Value::Byte(list.remove(index)), + List::Short(list) => Value::Short(list.remove(index)), + List::Int(list) => Value::Int(list.remove(index)), + List::Long(list) => Value::Long(list.remove(index)), + List::Float(list) => Value::Float(list.remove(index)), + List::Double(list) => Value::Double(list.remove(index)), + List::ByteArray(list) => Value::ByteArray(list.remove(index)), + List::String(list) => Value::String(list.remove(index)), + List::List(list) => Value::List(list.remove(index)), + List::Compound(list) => Value::Compound(list.remove(index)), + List::IntArray(list) => Value::IntArray(list.remove(index)), + List::LongArray(list) => Value::LongArray(list.remove(index)), + }; + + if self.is_empty() { + *self = List::End; + } + + removed + } + + /// Returns only the elements specified by the predicate, passing a mutable + /// reference to it. + /// + /// In other words, removes all elements `e` such that `f(ValueMut(&mut e))` + /// returns `false`. This method operates in place, visiting each element + /// exactly once in the original order, and preserves the order of the + /// retained elements. + pub fn retain(&mut self, mut f: F) + where + F: FnMut(ValueMut) -> bool, + { + match self { + List::End => {} + List::Byte(list) => list.retain_mut(|v| f(ValueMut::Byte(v))), + List::Short(list) => list.retain_mut(|v| f(ValueMut::Short(v))), + List::Int(list) => list.retain_mut(|v| f(ValueMut::Int(v))), + List::Long(list) => list.retain_mut(|v| f(ValueMut::Long(v))), + List::Float(list) => list.retain_mut(|v| f(ValueMut::Float(v))), + List::Double(list) => list.retain_mut(|v| f(ValueMut::Double(v))), + List::ByteArray(list) => list.retain_mut(|v| f(ValueMut::ByteArray(v))), + List::String(list) => list.retain_mut(|v| f(ValueMut::String(v))), + List::List(list) => list.retain_mut(|v| f(ValueMut::List(v))), + List::Compound(list) => list.retain_mut(|v| f(ValueMut::Compound(v))), + List::IntArray(list) => list.retain_mut(|v| f(ValueMut::IntArray(v))), + List::LongArray(list) => list.retain_mut(|v| f(ValueMut::LongArray(v))), + } + + if self.is_empty() { + *self = List::End; + } + } + + /// Returns an iterator over this list. This iterator yields [`ValueRef`]s. + pub fn iter(&self) -> Iter { + Iter { + inner: match self { + List::End => IterInner::End, + List::Byte(list) => IterInner::Byte(list.iter()), + List::Short(list) => IterInner::Short(list.iter()), + List::Int(list) => IterInner::Int(list.iter()), + List::Long(list) => IterInner::Long(list.iter()), + List::Float(list) => IterInner::Float(list.iter()), + List::Double(list) => IterInner::Double(list.iter()), + List::ByteArray(list) => IterInner::ByteArray(list.iter()), + List::String(list) => IterInner::String(list.iter()), + List::List(list) => IterInner::List(list.iter()), + List::Compound(list) => IterInner::Compound(list.iter()), + List::IntArray(list) => IterInner::IntArray(list.iter()), + List::LongArray(list) => IterInner::LongArray(list.iter()), + }, + } + } + + /// Returns a mutable iterator over this list. This iterator yields + /// [`ValueMut`]s. + pub fn iter_mut(&mut self) -> IterMut { + IterMut { + inner: match self { + List::End => IterMutInner::End, + List::Byte(list) => IterMutInner::Byte(list.iter_mut()), + List::Short(list) => IterMutInner::Short(list.iter_mut()), + List::Int(list) => IterMutInner::Int(list.iter_mut()), + List::Long(list) => IterMutInner::Long(list.iter_mut()), + List::Float(list) => IterMutInner::Float(list.iter_mut()), + List::Double(list) => IterMutInner::Double(list.iter_mut()), + List::ByteArray(list) => IterMutInner::ByteArray(list.iter_mut()), + List::String(list) => IterMutInner::String(list.iter_mut()), + List::List(list) => IterMutInner::List(list.iter_mut()), + List::Compound(list) => IterMutInner::Compound(list.iter_mut()), + List::IntArray(list) => IterMutInner::IntArray(list.iter_mut()), + List::LongArray(list) => IterMutInner::LongArray(list.iter_mut()), + }, + } + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::Byte(v) + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::Short(v) + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::Int(v) + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::Long(v) + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::Float(v) + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::Double(v) + } +} + +impl From>> for List { + fn from(v: Vec>) -> Self { + List::ByteArray(v) + } +} + +impl From> for List { + fn from(v: Vec) -> Self { + List::String(v) + } +} + +impl<'a> From>> for List> { + fn from(v: Vec>) -> Self { + List::String(v) + } +} + +#[cfg(feature = "java_string")] +impl From> for List { + fn from(v: Vec) -> Self { + List::String(v) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From>> for List> { + fn from(v: Vec>) -> Self { + List::String(v) + } +} + +impl From>> for List { + fn from(v: Vec>) -> Self { + List::List(v) + } +} + +impl From>> for List { + fn from(v: Vec>) -> Self { + List::Compound(v) + } +} + +impl From>> for List { + fn from(v: Vec>) -> Self { + List::IntArray(v) + } +} + +impl From>> for List { + fn from(v: Vec>) -> Self { + List::LongArray(v) + } +} + +/// Converts a value to a singleton list. +impl From> for List { + fn from(value: Value) -> Self { + match value { + Value::Byte(v) => List::Byte(vec![v]), + Value::Short(v) => List::Short(vec![v]), + Value::Int(v) => List::Int(vec![v]), + Value::Long(v) => List::Long(vec![v]), + Value::Float(v) => List::Float(vec![v]), + Value::Double(v) => List::Double(vec![v]), + Value::ByteArray(v) => List::ByteArray(vec![v]), + Value::String(v) => List::String(vec![v]), + Value::List(v) => List::List(vec![v]), + Value::Compound(v) => List::Compound(vec![v]), + Value::IntArray(v) => List::IntArray(vec![v]), + Value::LongArray(v) => List::LongArray(vec![v]), + } + } +} + +impl IntoIterator for List { + type Item = Value; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter { + inner: match self { + List::End => IntoIterInner::End, + List::Byte(list) => IntoIterInner::Byte(list.into_iter()), + List::Short(list) => IntoIterInner::Short(list.into_iter()), + List::Int(list) => IntoIterInner::Int(list.into_iter()), + List::Long(list) => IntoIterInner::Long(list.into_iter()), + List::Float(list) => IntoIterInner::Float(list.into_iter()), + List::Double(list) => IntoIterInner::Double(list.into_iter()), + List::ByteArray(list) => IntoIterInner::ByteArray(list.into_iter()), + List::String(list) => IntoIterInner::String(list.into_iter()), + List::List(list) => IntoIterInner::List(list.into_iter()), + List::Compound(list) => IntoIterInner::Compound(list.into_iter()), + List::IntArray(list) => IntoIterInner::IntArray(list.into_iter()), + List::LongArray(list) => IntoIterInner::LongArray(list.into_iter()), + }, + } + } +} + +impl<'a, S> IntoIterator for &'a List { + type Item = ValueRef<'a, S>; + type IntoIter = Iter<'a, S>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, S> IntoIterator for &'a mut List { + type Item = ValueMut<'a, S>; + type IntoIter = IterMut<'a, S>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// The owned iterator type for [`List`]. +#[derive(Clone, Debug)] +pub struct IntoIter { + inner: IntoIterInner, +} + +#[derive(Clone, Debug)] +enum IntoIterInner { + End, + Byte(std::vec::IntoIter), + Short(std::vec::IntoIter), + Int(std::vec::IntoIter), + Long(std::vec::IntoIter), + Float(std::vec::IntoIter), + Double(std::vec::IntoIter), + ByteArray(std::vec::IntoIter>), + String(std::vec::IntoIter), + List(std::vec::IntoIter>), + Compound(std::vec::IntoIter>), + IntArray(std::vec::IntoIter>), + LongArray(std::vec::IntoIter>), +} + +impl Iterator for IntoIter { + type Item = Value; + + fn next(&mut self) -> Option { + match &mut self.inner { + IntoIterInner::End => None, + IntoIterInner::Byte(i) => i.next().map(Value::Byte), + IntoIterInner::Short(i) => i.next().map(Value::Short), + IntoIterInner::Int(i) => i.next().map(Value::Int), + IntoIterInner::Long(i) => i.next().map(Value::Long), + IntoIterInner::Float(i) => i.next().map(Value::Float), + IntoIterInner::Double(i) => i.next().map(Value::Double), + IntoIterInner::ByteArray(i) => i.next().map(Value::ByteArray), + IntoIterInner::String(i) => i.next().map(Value::String), + IntoIterInner::List(i) => i.next().map(Value::List), + IntoIterInner::Compound(i) => i.next().map(Value::Compound), + IntoIterInner::IntArray(i) => i.next().map(Value::IntArray), + IntoIterInner::LongArray(i) => i.next().map(Value::LongArray), + } + } + + fn size_hint(&self) -> (usize, Option) { + match &self.inner { + IntoIterInner::End => (0, Some(0)), + IntoIterInner::Byte(i) => i.size_hint(), + IntoIterInner::Short(i) => i.size_hint(), + IntoIterInner::Int(i) => i.size_hint(), + IntoIterInner::Long(i) => i.size_hint(), + IntoIterInner::Float(i) => i.size_hint(), + IntoIterInner::Double(i) => i.size_hint(), + IntoIterInner::ByteArray(i) => i.size_hint(), + IntoIterInner::String(i) => i.size_hint(), + IntoIterInner::List(i) => i.size_hint(), + IntoIterInner::Compound(i) => i.size_hint(), + IntoIterInner::IntArray(i) => i.size_hint(), + IntoIterInner::LongArray(i) => i.size_hint(), + } + } +} + +impl DoubleEndedIterator for IntoIter { + fn next_back(&mut self) -> Option { + match &mut self.inner { + IntoIterInner::End => None, + IntoIterInner::Byte(i) => i.next_back().map(Value::Byte), + IntoIterInner::Short(i) => i.next_back().map(Value::Short), + IntoIterInner::Int(i) => i.next_back().map(Value::Int), + IntoIterInner::Long(i) => i.next_back().map(Value::Long), + IntoIterInner::Float(i) => i.next_back().map(Value::Float), + IntoIterInner::Double(i) => i.next_back().map(Value::Double), + IntoIterInner::ByteArray(i) => i.next_back().map(Value::ByteArray), + IntoIterInner::String(i) => i.next_back().map(Value::String), + IntoIterInner::List(i) => i.next_back().map(Value::List), + IntoIterInner::Compound(i) => i.next_back().map(Value::Compound), + IntoIterInner::IntArray(i) => i.next_back().map(Value::IntArray), + IntoIterInner::LongArray(i) => i.next_back().map(Value::LongArray), + } + } +} + +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + match &self.inner { + IntoIterInner::End => 0, + IntoIterInner::Byte(i) => i.len(), + IntoIterInner::Short(i) => i.len(), + IntoIterInner::Int(i) => i.len(), + IntoIterInner::Long(i) => i.len(), + IntoIterInner::Float(i) => i.len(), + IntoIterInner::Double(i) => i.len(), + IntoIterInner::ByteArray(i) => i.len(), + IntoIterInner::String(i) => i.len(), + IntoIterInner::List(i) => i.len(), + IntoIterInner::Compound(i) => i.len(), + IntoIterInner::IntArray(i) => i.len(), + IntoIterInner::LongArray(i) => i.len(), + } + } +} + +impl FusedIterator for IntoIter {} + +/// The borrowing iterator type for [`List`]. +#[derive(Clone, Debug)] +pub struct Iter<'a, S = String> { + inner: IterInner<'a, S>, +} + +#[derive(Clone, Debug)] +enum IterInner<'a, S> { + End, + Byte(std::slice::Iter<'a, i8>), + Short(std::slice::Iter<'a, i16>), + Int(std::slice::Iter<'a, i32>), + Long(std::slice::Iter<'a, i64>), + Float(std::slice::Iter<'a, f32>), + Double(std::slice::Iter<'a, f64>), + ByteArray(std::slice::Iter<'a, Vec>), + String(std::slice::Iter<'a, S>), + List(std::slice::Iter<'a, List>), + Compound(std::slice::Iter<'a, Compound>), + IntArray(std::slice::Iter<'a, Vec>), + LongArray(std::slice::Iter<'a, Vec>), +} + +impl<'a, S> Iterator for Iter<'a, S> { + type Item = ValueRef<'a, S>; + + fn next(&mut self) -> Option { + match &mut self.inner { + IterInner::End => None, + IterInner::Byte(i) => i.next().map(ValueRef::Byte), + IterInner::Short(i) => i.next().map(ValueRef::Short), + IterInner::Int(i) => i.next().map(ValueRef::Int), + IterInner::Long(i) => i.next().map(ValueRef::Long), + IterInner::Float(i) => i.next().map(ValueRef::Float), + IterInner::Double(i) => i.next().map(ValueRef::Double), + IterInner::ByteArray(i) => i.next().map(|arr| ValueRef::ByteArray(&arr[..])), + IterInner::String(i) => i.next().map(ValueRef::String), + IterInner::List(i) => i.next().map(ValueRef::List), + IterInner::Compound(i) => i.next().map(ValueRef::Compound), + IterInner::IntArray(i) => i.next().map(|arr| ValueRef::IntArray(&arr[..])), + IterInner::LongArray(i) => i.next().map(|arr| ValueRef::LongArray(&arr[..])), + } + } + + fn size_hint(&self) -> (usize, Option) { + match &self.inner { + IterInner::End => (0, Some(0)), + IterInner::Byte(i) => i.size_hint(), + IterInner::Short(i) => i.size_hint(), + IterInner::Int(i) => i.size_hint(), + IterInner::Long(i) => i.size_hint(), + IterInner::Float(i) => i.size_hint(), + IterInner::Double(i) => i.size_hint(), + IterInner::ByteArray(i) => i.size_hint(), + IterInner::String(i) => i.size_hint(), + IterInner::List(i) => i.size_hint(), + IterInner::Compound(i) => i.size_hint(), + IterInner::IntArray(i) => i.size_hint(), + IterInner::LongArray(i) => i.size_hint(), + } + } +} + +impl DoubleEndedIterator for Iter<'_, S> { + fn next_back(&mut self) -> Option { + match &mut self.inner { + IterInner::End => None, + IterInner::Byte(i) => i.next_back().map(ValueRef::Byte), + IterInner::Short(i) => i.next_back().map(ValueRef::Short), + IterInner::Int(i) => i.next_back().map(ValueRef::Int), + IterInner::Long(i) => i.next_back().map(ValueRef::Long), + IterInner::Float(i) => i.next_back().map(ValueRef::Float), + IterInner::Double(i) => i.next_back().map(ValueRef::Double), + IterInner::ByteArray(i) => i.next_back().map(|arr| ValueRef::ByteArray(&arr[..])), + IterInner::String(i) => i.next_back().map(ValueRef::String), + IterInner::List(i) => i.next_back().map(ValueRef::List), + IterInner::Compound(i) => i.next_back().map(ValueRef::Compound), + IterInner::IntArray(i) => i.next_back().map(|arr| ValueRef::IntArray(&arr[..])), + IterInner::LongArray(i) => i.next_back().map(|arr| ValueRef::LongArray(&arr[..])), + } + } +} + +impl ExactSizeIterator for Iter<'_, S> { + fn len(&self) -> usize { + match &self.inner { + IterInner::End => 0, + IterInner::Byte(i) => i.len(), + IterInner::Short(i) => i.len(), + IterInner::Int(i) => i.len(), + IterInner::Long(i) => i.len(), + IterInner::Float(i) => i.len(), + IterInner::Double(i) => i.len(), + IterInner::ByteArray(i) => i.len(), + IterInner::String(i) => i.len(), + IterInner::List(i) => i.len(), + IterInner::Compound(i) => i.len(), + IterInner::IntArray(i) => i.len(), + IterInner::LongArray(i) => i.len(), + } + } +} + +impl FusedIterator for Iter<'_, S> {} + +/// The mutable borrowing iterator type for [`List`]. +#[derive(Debug)] +pub struct IterMut<'a, S = String> { + inner: IterMutInner<'a, S>, +} + +#[derive(Debug)] +enum IterMutInner<'a, S> { + End, + Byte(std::slice::IterMut<'a, i8>), + Short(std::slice::IterMut<'a, i16>), + Int(std::slice::IterMut<'a, i32>), + Long(std::slice::IterMut<'a, i64>), + Float(std::slice::IterMut<'a, f32>), + Double(std::slice::IterMut<'a, f64>), + ByteArray(std::slice::IterMut<'a, Vec>), + String(std::slice::IterMut<'a, S>), + List(std::slice::IterMut<'a, List>), + Compound(std::slice::IterMut<'a, Compound>), + IntArray(std::slice::IterMut<'a, Vec>), + LongArray(std::slice::IterMut<'a, Vec>), +} + +impl<'a, S> Iterator for IterMut<'a, S> { + type Item = ValueMut<'a, S>; + + fn next(&mut self) -> Option { + match &mut self.inner { + IterMutInner::End => None, + IterMutInner::Byte(i) => i.next().map(ValueMut::Byte), + IterMutInner::Short(i) => i.next().map(ValueMut::Short), + IterMutInner::Int(i) => i.next().map(ValueMut::Int), + IterMutInner::Long(i) => i.next().map(ValueMut::Long), + IterMutInner::Float(i) => i.next().map(ValueMut::Float), + IterMutInner::Double(i) => i.next().map(ValueMut::Double), + IterMutInner::ByteArray(i) => i.next().map(ValueMut::ByteArray), + IterMutInner::String(i) => i.next().map(ValueMut::String), + IterMutInner::List(i) => i.next().map(ValueMut::List), + IterMutInner::Compound(i) => i.next().map(ValueMut::Compound), + IterMutInner::IntArray(i) => i.next().map(ValueMut::IntArray), + IterMutInner::LongArray(i) => i.next().map(ValueMut::LongArray), + } + } + + fn size_hint(&self) -> (usize, Option) { + match &self.inner { + IterMutInner::End => (0, Some(0)), + IterMutInner::Byte(i) => i.size_hint(), + IterMutInner::Short(i) => i.size_hint(), + IterMutInner::Int(i) => i.size_hint(), + IterMutInner::Long(i) => i.size_hint(), + IterMutInner::Float(i) => i.size_hint(), + IterMutInner::Double(i) => i.size_hint(), + IterMutInner::ByteArray(i) => i.size_hint(), + IterMutInner::String(i) => i.size_hint(), + IterMutInner::List(i) => i.size_hint(), + IterMutInner::Compound(i) => i.size_hint(), + IterMutInner::IntArray(i) => i.size_hint(), + IterMutInner::LongArray(i) => i.size_hint(), + } + } +} + +impl DoubleEndedIterator for IterMut<'_, S> { + fn next_back(&mut self) -> Option { + match &mut self.inner { + IterMutInner::End => None, + IterMutInner::Byte(i) => i.next_back().map(ValueMut::Byte), + IterMutInner::Short(i) => i.next_back().map(ValueMut::Short), + IterMutInner::Int(i) => i.next_back().map(ValueMut::Int), + IterMutInner::Long(i) => i.next_back().map(ValueMut::Long), + IterMutInner::Float(i) => i.next_back().map(ValueMut::Float), + IterMutInner::Double(i) => i.next_back().map(ValueMut::Double), + IterMutInner::ByteArray(i) => i.next_back().map(ValueMut::ByteArray), + IterMutInner::String(i) => i.next_back().map(ValueMut::String), + IterMutInner::List(i) => i.next_back().map(ValueMut::List), + IterMutInner::Compound(i) => i.next_back().map(ValueMut::Compound), + IterMutInner::IntArray(i) => i.next_back().map(ValueMut::IntArray), + IterMutInner::LongArray(i) => i.next_back().map(ValueMut::LongArray), + } + } +} + +impl ExactSizeIterator for IterMut<'_, S> { + fn len(&self) -> usize { + match &self.inner { + IterMutInner::End => 0, + IterMutInner::Byte(i) => i.len(), + IterMutInner::Short(i) => i.len(), + IterMutInner::Int(i) => i.len(), + IterMutInner::Long(i) => i.len(), + IterMutInner::Float(i) => i.len(), + IterMutInner::Double(i) => i.len(), + IterMutInner::ByteArray(i) => i.len(), + IterMutInner::String(i) => i.len(), + IterMutInner::List(i) => i.len(), + IterMutInner::Compound(i) => i.len(), + IterMutInner::IntArray(i) => i.len(), + IterMutInner::LongArray(i) => i.len(), + } + } +} + +impl FusedIterator for IterMut<'_, S> {} diff --git a/valence_nbt/src/serde.rs b/valence_nbt/src/serde.rs new file mode 100644 index 0000000..793cb26 --- /dev/null +++ b/valence_nbt/src/serde.rs @@ -0,0 +1,28 @@ +use std::fmt; + +pub use ser::*; + +use crate::Error; + +mod de; +mod ser; +#[cfg(test)] +mod tests; + +impl serde::de::Error for Error { + fn custom(msg: T) -> Self + where + T: fmt::Display, + { + Self::new_owned(format!("{msg}")) + } +} + +impl serde::ser::Error for Error { + fn custom(msg: T) -> Self + where + T: fmt::Display, + { + Self::new_owned(format!("{msg}")) + } +} diff --git a/valence_nbt/src/serde/de.rs b/valence_nbt/src/serde/de.rs new file mode 100644 index 0000000..e3826d7 --- /dev/null +++ b/valence_nbt/src/serde/de.rs @@ -0,0 +1,393 @@ +use std::fmt; +use std::hash::Hash; +use std::marker::PhantomData; + +use serde::de::value::{ + MapAccessDeserializer, MapDeserializer, SeqAccessDeserializer, StrDeserializer, + StringDeserializer, +}; +use serde::de::{self, IntoDeserializer, SeqAccess, Visitor}; +use serde::{forward_to_deserialize_any, Deserialize, Deserializer}; + +use super::Error; +use crate::conv::{i8_vec_into_u8_vec, u8_slice_as_i8_slice, u8_vec_into_i8_vec}; +use crate::{Compound, List, Value}; + +impl<'de, S> Deserialize<'de> for Value +where + S: Deserialize<'de> + Ord + Hash, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ValueVisitor(PhantomData); + + impl<'de, S> Visitor<'de> for ValueVisitor + where + S: Deserialize<'de> + Ord + Hash, + { + type Value = Value; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a valid NBT type") + } + + fn visit_bool(self, v: bool) -> Result + where + E: de::Error, + { + Ok(Value::Byte(v.into())) + } + + fn visit_i8(self, v: i8) -> Result + where + E: de::Error, + { + Ok(Value::Byte(v)) + } + + fn visit_i16(self, v: i16) -> Result + where + E: de::Error, + { + Ok(Value::Short(v)) + } + + fn visit_i32(self, v: i32) -> Result + where + E: de::Error, + { + Ok(Value::Int(v)) + } + + fn visit_i64(self, v: i64) -> Result + where + E: de::Error, + { + Ok(Value::Long(v)) + } + + fn visit_u8(self, v: u8) -> Result + where + E: de::Error, + { + Ok(Value::Byte(v as i8)) + } + + fn visit_u16(self, v: u16) -> Result + where + E: de::Error, + { + Ok(Value::Short(v as i16)) + } + + fn visit_u32(self, v: u32) -> Result + where + E: de::Error, + { + Ok(Value::Int(v as i32)) + } + + fn visit_u64(self, v: u64) -> Result + where + E: de::Error, + { + Ok(Value::Long(v as i64)) + } + + fn visit_f32(self, v: f32) -> Result + where + E: de::Error, + { + Ok(Value::Float(v)) + } + + fn visit_f64(self, v: f64) -> Result + where + E: de::Error, + { + Ok(Value::Double(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + S::deserialize(StrDeserializer::new(v)).map(Value::String) + } + + fn visit_string(self, v: String) -> Result + where + E: de::Error, + { + S::deserialize(StringDeserializer::new(v)).map(Value::String) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: de::Error, + { + Ok(Value::ByteArray(u8_slice_as_i8_slice(v).into())) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: de::Error, + { + Ok(Value::ByteArray(u8_vec_into_i8_vec(v))) + } + + fn visit_seq(self, seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + Ok(List::deserialize(SeqAccessDeserializer::new(seq))?.into()) + } + + fn visit_map(self, map: A) -> Result + where + A: de::MapAccess<'de>, + { + Ok(Compound::deserialize(MapAccessDeserializer::new(map))?.into()) + } + } + + deserializer.deserialize_any(ValueVisitor::(PhantomData)) + } +} + +impl<'de, S> Deserialize<'de> for List +where + S: Deserialize<'de> + Ord + Hash, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ListVisitor(PhantomData); + + impl<'de, S> Visitor<'de> for ListVisitor + where + S: Deserialize<'de> + Ord + Hash, + { + type Value = List; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a sequence or bytes") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + match seq.next_element::>()? { + Some(v) => match v { + Value::Byte(v) => deserialize_seq_remainder(v, seq, From::from), + Value::Short(v) => deserialize_seq_remainder(v, seq, From::from), + Value::Int(v) => deserialize_seq_remainder(v, seq, From::from), + Value::Long(v) => deserialize_seq_remainder(v, seq, From::from), + Value::Float(v) => deserialize_seq_remainder(v, seq, From::from), + Value::Double(v) => deserialize_seq_remainder(v, seq, From::from), + Value::ByteArray(v) => deserialize_seq_remainder(v, seq, From::from), + Value::String(v) => deserialize_seq_remainder(v, seq, List::String), + Value::List(v) => deserialize_seq_remainder(v, seq, From::from), + Value::Compound(v) => deserialize_seq_remainder(v, seq, From::from), + Value::IntArray(v) => deserialize_seq_remainder(v, seq, From::from), + Value::LongArray(v) => deserialize_seq_remainder(v, seq, From::from), + }, + None => Ok(List::End), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: de::Error, + { + Ok(List::Byte(u8_vec_into_i8_vec(v))) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: de::Error, + { + Ok(List::Byte(u8_slice_as_i8_slice(v).into())) + } + } + + deserializer.deserialize_seq(ListVisitor::(PhantomData)) + } +} + +/// Deserializes the remainder of a sequence after having +/// determined the type of the first element. +fn deserialize_seq_remainder<'de, T, A, S, C>( + first: T, + mut seq: A, + conv: C, +) -> Result, A::Error> +where + T: Deserialize<'de>, + A: de::SeqAccess<'de>, + C: FnOnce(Vec) -> List, +{ + let mut vec = match seq.size_hint() { + Some(n) => Vec::with_capacity(n + 1), + None => Vec::new(), + }; + + vec.push(first); + + while let Some(v) = seq.next_element()? { + vec.push(v); + } + + Ok(conv(vec)) +} + +impl<'de> Deserializer<'de> for Compound { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_map(MapDeserializer::new(self.into_iter())) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any + } +} + +impl<'de> IntoDeserializer<'de, Error> for Compound { + type Deserializer = Self; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + +impl<'de> Deserializer<'de> for Value { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Value::Byte(v) => visitor.visit_i8(v), + Value::Short(v) => visitor.visit_i16(v), + Value::Int(v) => visitor.visit_i32(v), + Value::Long(v) => visitor.visit_i64(v), + Value::Float(v) => visitor.visit_f32(v), + Value::Double(v) => visitor.visit_f64(v), + Value::ByteArray(v) => visitor.visit_byte_buf(i8_vec_into_u8_vec(v)), + Value::String(v) => visitor.visit_string(v), + Value::List(v) => v.deserialize_any(visitor), + Value::Compound(v) => v.into_deserializer().deserialize_any(visitor), + Value::IntArray(v) => v.into_deserializer().deserialize_any(visitor), + Value::LongArray(v) => v.into_deserializer().deserialize_any(visitor), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Value::Byte(b) => visitor.visit_bool(b != 0), + _ => self.deserialize_any(visitor), + } + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_some(self) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self { + Value::String(s) => visitor.visit_enum(s.into_deserializer()), // Unit variant. + other => other.deserialize_any(visitor), + } + } + + forward_to_deserialize_any! { + i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct newtype_struct seq tuple + tuple_struct map struct identifier ignored_any + } +} + +impl<'de> IntoDeserializer<'de, Error> for Value { + type Deserializer = Self; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + +impl<'de> Deserializer<'de> for List { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + struct EndSeqAccess; + + impl<'de> SeqAccess<'de> for EndSeqAccess { + type Error = Error; + + fn next_element_seed(&mut self, _seed: T) -> Result, Self::Error> + where + T: de::DeserializeSeed<'de>, + { + Ok(None) + } + } + + match self { + List::End => visitor.visit_seq(EndSeqAccess), + List::Byte(v) => visitor.visit_byte_buf(i8_vec_into_u8_vec(v)), + List::Short(v) => v.into_deserializer().deserialize_any(visitor), + List::Int(v) => v.into_deserializer().deserialize_any(visitor), + List::Long(v) => v.into_deserializer().deserialize_any(visitor), + List::Float(v) => v.into_deserializer().deserialize_any(visitor), + List::Double(v) => v.into_deserializer().deserialize_any(visitor), + List::ByteArray(v) => v.into_deserializer().deserialize_any(visitor), + List::String(v) => v.into_deserializer().deserialize_any(visitor), + List::List(v) => v.into_deserializer().deserialize_any(visitor), + List::Compound(v) => v.into_deserializer().deserialize_any(visitor), + List::IntArray(v) => v.into_deserializer().deserialize_any(visitor), + List::LongArray(v) => v.into_deserializer().deserialize_any(visitor), + } + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any + } +} + +impl<'de> IntoDeserializer<'de, Error> for List { + type Deserializer = Self; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} diff --git a/valence_nbt/src/serde/ser.rs b/valence_nbt/src/serde/ser.rs new file mode 100644 index 0000000..99f5190 --- /dev/null +++ b/valence_nbt/src/serde/ser.rs @@ -0,0 +1,630 @@ +use std::hash::Hash; +use std::marker::PhantomData; + +use serde::ser::{Impossible, SerializeMap, SerializeSeq, SerializeStruct}; +use serde::{Serialize, Serializer}; + +use super::Error; +use crate::conv::{i8_slice_as_u8_slice, u8_vec_into_i8_vec}; +use crate::{Compound, List, Value}; + +impl Serialize for Value +where + Str: Serialize + Ord + Hash, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Value::Byte(v) => serializer.serialize_i8(*v), + Value::Short(v) => serializer.serialize_i16(*v), + Value::Int(v) => serializer.serialize_i32(*v), + Value::Long(v) => serializer.serialize_i64(*v), + Value::Float(v) => serializer.serialize_f32(*v), + Value::Double(v) => serializer.serialize_f64(*v), + Value::ByteArray(v) => serializer.serialize_bytes(i8_slice_as_u8_slice(v)), + Value::String(v) => v.serialize(serializer), + Value::List(v) => v.serialize(serializer), + Value::Compound(v) => v.serialize(serializer), + Value::IntArray(v) => v.serialize(serializer), + Value::LongArray(v) => v.serialize(serializer), + } + } +} + +impl Serialize for List +where + Str: Serialize + Ord + Hash, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + List::End => serializer.serialize_seq(Some(0))?.end(), + List::Byte(v) => v.serialize(serializer), + List::Short(v) => v.serialize(serializer), + List::Int(v) => v.serialize(serializer), + List::Long(v) => v.serialize(serializer), + List::Float(v) => v.serialize(serializer), + List::Double(v) => v.serialize(serializer), + List::ByteArray(v) => v.serialize(serializer), + List::String(v) => v.serialize(serializer), + List::List(v) => v.serialize(serializer), + List::Compound(v) => v.serialize(serializer), + List::IntArray(v) => v.serialize(serializer), + List::LongArray(v) => v.serialize(serializer), + } + } +} + +macro_rules! unsupported { + ($lit:literal) => { + Err(Error::new_static(concat!("unsupported type: ", $lit))) + }; +} + +/// [`Serializer`] whose output is [`Compound`]. +#[derive(Debug)] +pub struct CompoundSerializer; + +impl Serializer for CompoundSerializer { + type Ok = Compound; + + type Error = Error; + + type SerializeSeq = Impossible; + + type SerializeTuple = Impossible; + + type SerializeTupleStruct = Impossible; + + type SerializeTupleVariant = Impossible; + + type SerializeMap = GenericSerializeMap; + + type SerializeStruct = GenericSerializeStruct; + + type SerializeStructVariant = Impossible; + + fn serialize_bool(self, _v: bool) -> Result { + unsupported!("bool") + } + + fn serialize_i8(self, _v: i8) -> Result { + unsupported!("i8") + } + + fn serialize_i16(self, _v: i16) -> Result { + unsupported!("i16") + } + + fn serialize_i32(self, _v: i32) -> Result { + unsupported!("i32") + } + + fn serialize_i64(self, _v: i64) -> Result { + unsupported!("i64") + } + + fn serialize_u8(self, _v: u8) -> Result { + unsupported!("u8") + } + + fn serialize_u16(self, _v: u16) -> Result { + unsupported!("u16") + } + + fn serialize_u32(self, _v: u32) -> Result { + unsupported!("u32") + } + + fn serialize_u64(self, _v: u64) -> Result { + unsupported!("u64") + } + + fn serialize_f32(self, _v: f32) -> Result { + unsupported!("f32") + } + + fn serialize_f64(self, _v: f64) -> Result { + unsupported!("f64") + } + + fn serialize_char(self, _v: char) -> Result { + unsupported!("char") + } + + fn serialize_str(self, _v: &str) -> Result { + unsupported!("str") + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + unsupported!("bytes") + } + + fn serialize_none(self) -> Result { + unsupported!("none") + } + + fn serialize_some(self, _value: &T) -> Result + where + T: Serialize, + { + unsupported!("some") + } + + fn serialize_unit(self) -> Result { + unsupported!("unit") + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + unsupported!("unit struct") + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + unsupported!("unit variant") + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + unsupported!("newtype struct") + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + unsupported!("newtype variant") + } + + fn serialize_seq(self, _len: Option) -> Result { + unsupported!("seq") + } + + fn serialize_tuple(self, _len: usize) -> Result { + unsupported!("tuple") + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unsupported!("tuple struct") + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unsupported!("tuple variant") + } + + fn serialize_map(self, len: Option) -> Result { + Ok(GenericSerializeMap::new(len)) + } + + fn serialize_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + Ok(GenericSerializeStruct::new(len)) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unsupported!("struct variant") + } +} + +/// [`Serializer`] whose output is [`Value`]. +struct ValueSerializer; + +impl Serializer for ValueSerializer { + type Ok = Value; + + type Error = Error; + + type SerializeSeq = ValueSerializeSeq; + + type SerializeTuple = Impossible; + + type SerializeTupleStruct = Impossible; + + type SerializeTupleVariant = Impossible; + + type SerializeMap = GenericSerializeMap; + + type SerializeStruct = GenericSerializeStruct; + + type SerializeStructVariant = Impossible; + + fn serialize_bool(self, v: bool) -> Result { + Ok(Value::Byte(v.into())) + } + + fn serialize_i8(self, v: i8) -> Result { + Ok(Value::Byte(v)) + } + + fn serialize_i16(self, v: i16) -> Result { + Ok(Value::Short(v)) + } + + fn serialize_i32(self, v: i32) -> Result { + Ok(Value::Int(v)) + } + + fn serialize_i64(self, v: i64) -> Result { + Ok(Value::Long(v)) + } + + fn serialize_u8(self, v: u8) -> Result { + Ok(Value::Byte(v as i8)) + } + + fn serialize_u16(self, v: u16) -> Result { + Ok(Value::Short(v as i16)) + } + + fn serialize_u32(self, v: u32) -> Result { + Ok(Value::Int(v as i32)) + } + + fn serialize_u64(self, v: u64) -> Result { + Ok(Value::Long(v as i64)) + } + + fn serialize_f32(self, v: f32) -> Result { + Ok(Value::Float(v)) + } + + fn serialize_f64(self, v: f64) -> Result { + Ok(Value::Double(v)) + } + + fn serialize_char(self, v: char) -> Result { + Ok(Value::String(v.into())) + } + + fn serialize_str(self, v: &str) -> Result { + Ok(Value::String(v.into())) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + Ok(Value::ByteArray(u8_vec_into_i8_vec(v.into()))) + } + + fn serialize_none(self) -> Result { + unsupported!("none") + } + + fn serialize_some(self, value: &T) -> Result + where + T: Serialize, + { + value.serialize(self) + } + + fn serialize_unit(self) -> Result { + unsupported!("unit") + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + self.serialize_unit() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + Ok(Value::String(variant.into())) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result + where + T: Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + unsupported!("newtype variant") + } + + fn serialize_seq(self, len: Option) -> Result { + Ok(ValueSerializeSeq::End { + len: len.unwrap_or(0), + }) + } + + fn serialize_tuple(self, _len: usize) -> Result { + unsupported!("tuple") + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unsupported!("tuple struct") + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unsupported!("tuple variant") + } + + fn serialize_map(self, len: Option) -> Result { + Ok(GenericSerializeMap::new(len)) + } + + fn serialize_struct( + self, + _name: &'static str, + len: usize, + ) -> Result { + Ok(GenericSerializeStruct::new(len)) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unsupported!("struct variant") + } +} + +enum ValueSerializeSeq { + End { len: usize }, + Byte(Vec), + Short(Vec), + Int(Vec), + Long(Vec), + Float(Vec), + Double(Vec), + ByteArray(Vec>), + String(Vec), + List(Vec), + Compound(Vec), + IntArray(Vec>), + LongArray(Vec>), +} + +impl SerializeSeq for ValueSerializeSeq { + type Ok = Value; + + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + macro_rules! serialize_variant { + ($variant:ident, $vec:ident, $elem:ident) => {{ + match $elem.serialize(ValueSerializer)? { + Value::$variant(val) => { + $vec.push(val); + Ok(()) + } + _ => Err(Error::new_static(concat!( + "heterogeneous NBT list (expected `", + stringify!($variant), + "` element)" + ))), + } + }}; + } + + match self { + Self::End { len } => { + fn vec(elem: T, len: usize) -> Vec { + let mut vec = Vec::with_capacity(len); + vec.push(elem); + vec + } + + // Set the first element of the list. + *self = match value.serialize(ValueSerializer)? { + Value::Byte(v) => Self::Byte(vec(v, *len)), + Value::Short(v) => Self::Short(vec(v, *len)), + Value::Int(v) => Self::Int(vec(v, *len)), + Value::Long(v) => Self::Long(vec(v, *len)), + Value::Float(v) => Self::Float(vec(v, *len)), + Value::Double(v) => Self::Double(vec(v, *len)), + Value::ByteArray(v) => Self::ByteArray(vec(v, *len)), + Value::String(v) => Self::String(vec(v, *len)), + Value::List(v) => Self::List(vec(v, *len)), + Value::Compound(v) => Self::Compound(vec(v, *len)), + Value::IntArray(v) => Self::IntArray(vec(v, *len)), + Value::LongArray(v) => Self::LongArray(vec(v, *len)), + }; + Ok(()) + } + Self::Byte(v) => serialize_variant!(Byte, v, value), + Self::Short(v) => serialize_variant!(Short, v, value), + Self::Int(v) => serialize_variant!(Int, v, value), + Self::Long(v) => serialize_variant!(Long, v, value), + Self::Float(v) => serialize_variant!(Float, v, value), + Self::Double(v) => serialize_variant!(Double, v, value), + Self::ByteArray(v) => serialize_variant!(ByteArray, v, value), + Self::String(v) => serialize_variant!(String, v, value), + Self::List(v) => serialize_variant!(List, v, value), + Self::Compound(v) => serialize_variant!(Compound, v, value), + Self::IntArray(v) => serialize_variant!(IntArray, v, value), + Self::LongArray(v) => serialize_variant!(LongArray, v, value), + } + } + + fn end(self) -> Result { + Ok(match self { + Self::End { .. } => List::End.into(), + Self::Byte(v) => v.into(), + Self::Short(v) => List::Short(v).into(), + Self::Int(v) => v.into(), + Self::Long(v) => List::Long(v).into(), + Self::Float(v) => List::Float(v).into(), + Self::Double(v) => List::Double(v).into(), + Self::ByteArray(v) => List::ByteArray(v).into(), + Self::String(v) => List::String(v).into(), + Self::List(v) => List::List(v).into(), + Self::Compound(v) => List::Compound(v).into(), + Self::IntArray(v) => List::IntArray(v).into(), + Self::LongArray(v) => List::LongArray(v).into(), + }) + } +} + +#[doc(hidden)] +#[derive(Debug)] +pub struct GenericSerializeMap { + /// Temp storage for `serialize_key`. + key: Option, + res: Compound, + _marker: PhantomData, +} + +impl GenericSerializeMap { + pub fn new(len: Option) -> Self { + Self { + key: None, + res: Compound::with_capacity(len.unwrap_or(0)), + _marker: PhantomData, + } + } +} + +impl SerializeMap for GenericSerializeMap +where + Compound: Into, +{ + type Ok = Ok; + + type Error = Error; + + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + debug_assert!( + self.key.is_none(), + "call to `serialize_key` must be followed by `serialize_value`" + ); + + match key.serialize(ValueSerializer)? { + Value::String(s) => { + self.key = Some(s); + Ok(()) + } + _ => Err(Error::new_static("invalid map key type (expected string)")), + } + } + + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + let key = self + .key + .take() + .expect("missing previous call to `serialize_key`"); + self.res.insert(key, value.serialize(ValueSerializer)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(self.res.into()) + } +} + +#[doc(hidden)] +#[derive(Debug)] +pub struct GenericSerializeStruct { + c: Compound, + _marker: PhantomData, +} + +impl GenericSerializeStruct { + fn new(len: usize) -> Self { + Self { + c: Compound::with_capacity(len), + _marker: PhantomData, + } + } +} + +impl SerializeStruct for GenericSerializeStruct +where + Compound: Into, +{ + type Ok = Ok; + + type Error = Error; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where + T: Serialize, + { + self.c.insert(key, value.serialize(ValueSerializer)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(self.c.into()) + } +} diff --git a/valence_nbt/src/serde/tests.rs b/valence_nbt/src/serde/tests.rs new file mode 100644 index 0000000..ab4dd2d --- /dev/null +++ b/valence_nbt/src/serde/tests.rs @@ -0,0 +1,109 @@ +use pretty_assertions::assert_eq; +use serde::{Deserialize, Serialize}; +use serde_json::json; + +use super::*; +use crate::{compound, Compound, List}; + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +struct Struct { + foo: i32, + bar: StructInner, + baz: String, + quux: Vec, + blah: EnumInner, +} + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +struct StructInner { + a: bool, + b: i64, + c: Vec>, + d: Vec, +} + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +enum EnumInner { + A, + B, + C, +} + +fn make_struct() -> Struct { + Struct { + foo: i32::MIN, + bar: StructInner { + a: true, + b: 123456789, + c: vec![vec![1, 2, 3], vec![4, 5, 6]], + d: vec![], + }, + baz: "๐Ÿคจ".into(), + quux: vec![std::f32::consts::PI, f32::MAX, f32::MIN], + blah: EnumInner::B, + } +} + +fn make_compound() -> Compound { + compound! { + "foo" => i32::MIN, + "bar" => compound! { + "a" => true, + "b" => 123456789_i64, + "c" => List::IntArray(vec![vec![1, 2, 3], vec![4, 5, 6]]), + "d" => List::End, + }, + "baz" => "๐Ÿคจ", + "quux" => List::Float(vec![ + std::f32::consts::PI, + f32::MAX, + f32::MIN, + ]), + "blah" => "B" + } +} + +fn make_json() -> serde_json::Value { + json!({ + "foo": i32::MIN, + "bar": { + "a": true, + "b": 123456789_i64, + "c": [[1, 2, 3], [4, 5, 6]], + "d": [] + }, + "baz": "๐Ÿคจ", + "quux": [ + std::f32::consts::PI, + f32::MAX, + f32::MIN, + ], + "blah": "B" + }) +} + +#[test] +fn struct_to_compound() { + let c = make_struct().serialize(CompoundSerializer).unwrap(); + + assert_eq!(c, make_compound()); +} + +#[test] +fn compound_to_struct() { + let s = Struct::deserialize(make_compound()).unwrap(); + + assert_eq!(s, make_struct()); +} + +#[test] +fn compound_to_json() { + let mut j = serde_json::to_value(make_compound()).unwrap(); + + // Bools map to bytes in NBT, but the result should be the same otherwise. + let p = j.pointer_mut("/bar/a").unwrap(); + assert_eq!(*p, serde_json::Value::from(1)); + *p = true.into(); + + assert_eq!(j, make_json()); +} diff --git a/valence_nbt/src/snbt.rs b/valence_nbt/src/snbt.rs new file mode 100644 index 0000000..fa4947b --- /dev/null +++ b/valence_nbt/src/snbt.rs @@ -0,0 +1,722 @@ +use std::error::Error; +use std::fmt::{Display, Formatter}; +use std::iter::Peekable; +use std::str::Chars; + +use crate::{Compound, List, Value}; + +const STRING_MAX_LEN: usize = 32767; +/// Maximum recursion depth to prevent overflowing the call stack. +const MAX_DEPTH: usize = 512; + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +pub enum SnbtErrorKind { + ReachEndOfStream, + InvalidEscapeSequence, + EmptyKeyInCompound, + ExpectColon, + ExpectValue, + ExpectComma, + WrongTypeInArray, + DifferentTypesInList, + LongString, + TrailingData, + DepthLimitExceeded, +} + +impl Display for SnbtErrorKind { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use SnbtErrorKind::*; + match self { + ReachEndOfStream => write!(f, "reach end of stream"), + InvalidEscapeSequence => write!(f, "invalid escape sequence"), + EmptyKeyInCompound => write!(f, "empty key in compound"), + ExpectColon => write!(f, "expect colon"), + ExpectValue => write!(f, "expect value"), + ExpectComma => write!(f, "expect comma"), + WrongTypeInArray => write!(f, "wrong type in array"), + DifferentTypesInList => write!(f, "different types in list"), + LongString => write!(f, "long string"), + TrailingData => write!(f, "extra data after end"), + DepthLimitExceeded => write!(f, "depth limit exceeded"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +pub struct SnbtError { + pub kind: SnbtErrorKind, + pub line: usize, + pub column: usize, +} + +impl SnbtError { + pub fn new(kind: SnbtErrorKind, line: usize, column: usize) -> Self { + Self { kind, line, column } + } +} + +impl Display for SnbtError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "@ {},{}: {}", self.line, self.column, self.kind) + } +} + +impl Error for SnbtError {} + +type Result = std::result::Result; + +#[derive(Debug)] +pub struct SnbtReader<'a> { + line: usize, + column: usize, + index: usize, + depth: usize, + iter: Peekable>, + pushed_back: Option, +} + +impl<'a> SnbtReader<'a> { + pub fn new(input: &'a str) -> Self { + Self { + line: 1, + column: 1, + index: 0, + depth: 0, + iter: input.chars().peekable(), + pushed_back: None, + } + } + + fn check_depth(&mut self, f: impl FnOnce(&mut Self) -> Result) -> Result { + if self.depth >= MAX_DEPTH { + Err(self.make_error(SnbtErrorKind::DepthLimitExceeded)) + } else { + self.depth += 1; + let res = f(self); + self.depth -= 1; + res + } + } + + fn make_error(&self, kind: SnbtErrorKind) -> SnbtError { + SnbtError::new(kind, self.line, self.column) + } + + fn peek(&mut self) -> Result { + if let Some(c) = self.pushed_back { + Ok(c) + } else { + self.iter + .peek() + .copied() + .ok_or_else(|| self.make_error(SnbtErrorKind::ReachEndOfStream)) + } + } + + fn next(&mut self) { + if self.pushed_back.is_some() { + self.pushed_back = None; + return; + } + + let result = self.iter.next(); + + if let Some(c) = result { + if c == '\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + self.index += c.len_utf8(); + } + } + + /// Push back a char, only one char can be pushed back + fn push_back(&mut self, c: char) { + if c == '\n' { + self.line -= 1; + self.column = 1; + } else { + self.column -= 1; + } + + self.index -= c.len_utf8(); + + match self.pushed_back { + Some(_) => panic!("Can't push back two chars"), + None => self.pushed_back = Some(c), + }; + } + + fn skip_whitespace(&mut self) { + loop { + match self.peek() { + Ok(c) if c.is_whitespace() => self.next(), + _ => break, + }; + } + } + + fn read_string(&mut self) -> Result { + let first = self.peek()?; + + let str = match first { + '\"' | '\'' => self.read_quoted_string(), + _ => self.read_unquoted_string(), + }?; + + if str.len() > STRING_MAX_LEN { + return Err(self.make_error(SnbtErrorKind::LongString)); + } + + Ok(str) + } + + fn read_unquoted_string(&mut self) -> Result { + let mut result = String::new(); + + loop { + let input = self.peek(); + match input { + Ok('a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '+' | '.') => { + result.push(input?); + self.next(); + } + _ => break, + } + } + + Ok(result) + } + + fn read_quoted_string(&mut self) -> Result { + let quote = self.peek()?; + self.next(); + + let mut result = String::new(); + loop { + let input = self.peek(); + match input { + Ok(c) if c == quote => { + self.next(); + break; + } + Ok('\\') => { + self.next(); + + let escape = self.peek()?; + if escape == quote || escape == '\\' { + result.push(escape); + } else { + return Err(self.make_error(SnbtErrorKind::InvalidEscapeSequence)); + } + + self.next(); + } + Ok(c) => { + result.push(c); + self.next(); + } + Err(e) => return Err(e), + } + } + if result.len() > STRING_MAX_LEN { + return Err(self.make_error(SnbtErrorKind::LongString)); + } + Ok(result) + } + + fn parse_compound(&mut self) -> Result { + self.next(); + self.skip_whitespace(); + + let mut cpd = Compound::new(); + while self.peek()? != '}' { + let key = self.read_string()?; + + self.skip_whitespace(); + + if key.is_empty() { + return Err(self.make_error(SnbtErrorKind::EmptyKeyInCompound)); + } + + if self.peek()? != ':' { + return Err(self.make_error(SnbtErrorKind::ExpectColon)); + } + + self.next(); + self.skip_whitespace(); + + let value = self.parse_element()?; + + self.skip_whitespace(); + if self.peek()? == ',' { + self.next(); + self.skip_whitespace(); + } else if self.peek()? != '}' { + return Err(self.make_error(SnbtErrorKind::ExpectComma)); + } + + cpd.insert(key, value); + } + self.next(); + Ok(cpd) + } + + fn continue_parse_list(&mut self) -> Result { + self.skip_whitespace(); + + let mut list = List::End; + + while self.peek()? != ']' { + let value = self.parse_element()?; + self.skip_whitespace(); + + match (&mut list, value) { + (list @ List::End, value) => *list = value.into(), + (List::Byte(l), Value::Byte(v)) => l.push(v), + (List::Short(l), Value::Short(v)) => l.push(v), + (List::Int(l), Value::Int(v)) => l.push(v), + (List::Long(l), Value::Long(v)) => l.push(v), + (List::Float(l), Value::Float(v)) => l.push(v), + (List::Double(l), Value::Double(v)) => l.push(v), + (List::ByteArray(l), Value::ByteArray(v)) => l.push(v), + (List::String(l), Value::String(v)) => l.push(v), + (List::List(l), Value::List(v)) => l.push(v), + (List::Compound(l), Value::Compound(v)) => l.push(v), + (List::IntArray(l), Value::IntArray(v)) => l.push(v), + (List::LongArray(l), Value::LongArray(v)) => l.push(v), + _ => return Err(self.make_error(SnbtErrorKind::DifferentTypesInList)), + } + + if self.peek()? == ',' { + self.next(); + self.skip_whitespace(); + } else if self.peek()? != ']' { + return Err(self.make_error(SnbtErrorKind::ExpectComma)); + } + } + self.next(); + + Ok(list) + } + + fn parse_list_like(&mut self) -> Result { + self.next(); + + let type_char = self.peek()?; + + let mut values = match type_char { + 'B' => Value::ByteArray(vec![]), + 'I' => Value::IntArray(vec![]), + 'L' => Value::LongArray(vec![]), + _ => return self.check_depth(|v| Ok(v.continue_parse_list()?.into())), + }; + + self.next(); + + if self.peek()? != ';' { + self.push_back(type_char); + return self.check_depth(|v| Ok(v.continue_parse_list()?.into())); + } + + self.next(); + self.skip_whitespace(); + + while self.peek()? != ']' { + let value = self.parse_element()?; + + match (&mut values, value) { + (Value::ByteArray(l), Value::Byte(v)) => l.push(v), + (Value::IntArray(l), Value::Int(v)) => l.push(v), + (Value::LongArray(l), Value::Long(v)) => l.push(v), + _ => return Err(self.make_error(SnbtErrorKind::WrongTypeInArray)), + } + + self.skip_whitespace(); + if self.peek()? == ',' { + self.next(); + self.skip_whitespace(); + } else if self.peek()? != ']' { + return Err(self.make_error(SnbtErrorKind::ExpectComma)); + } + } + + self.next(); + + Ok(values) + } + + fn parse_primitive(&mut self) -> Result { + macro_rules! try_ret { + // Try possible solution until one works + ($v:expr) => {{ + match $v { + Ok(v) => return Ok(v.into()), + Err(_) => (), + } + }}; + } + + let target = self.read_unquoted_string()?; + + match target + .bytes() + .last() + .ok_or_else(|| self.make_error(SnbtErrorKind::ExpectValue))? + { + b'b' | b'B' => try_ret!(target[..target.len() - 1].parse::()), + b's' | b'S' => try_ret!(target[..target.len() - 1].parse::()), + b'l' | b'L' => try_ret!(target[..target.len() - 1].parse::()), + b'f' | b'F' => try_ret!(target[..target.len() - 1].parse::()), + b'd' | b'D' => try_ret!(target[..target.len() - 1].parse::()), + _ => (), + } + + match target.as_str() { + "true" => return Ok(Value::Byte(1)), + "false" => return Ok(Value::Byte(0)), + _ => { + try_ret!(target.parse::()); + try_ret!(target.parse::()); + } + }; + + if target.len() > STRING_MAX_LEN { + return Err(self.make_error(SnbtErrorKind::LongString)); + } + + Ok(Value::String(target)) + } + + /// Read the next element in the SNBT string. + /// [`SnbtErrorKind::TrailingData`] cannot be returned because it is not + /// considered to be an error. + pub fn parse_element(&mut self) -> Result { + self.skip_whitespace(); + + match self.peek()? { + '{' => self.check_depth(|v| Ok(v.parse_compound()?.into())), + '[' => self.parse_list_like(), + '"' | '\'' => self.read_quoted_string().map(|s| s.into()), + _ => self.parse_primitive(), + } + } + + pub fn read(&mut self) -> Result { + let value = self.parse_element()?; + + self.skip_whitespace(); + if self.peek().is_ok() { + return Err(self.make_error(SnbtErrorKind::TrailingData)); + } + + Ok(value) + } + + /// Get the number of bytes read. + /// It's useful when you want to read a SNBT string from an command argument + /// since there may be trailing data. + pub fn bytes_read(&self) -> usize { + self.index + } +} +/// Parse a string in SNBT format into a `Value`. +/// Assert that the string has no trailing data. +/// SNBT is quite similar to JSON, but with some differences. +/// See [the wiki](https://minecraft.wiki/w/NBT_format#SNBT_format) for more information. +/// +/// # Example +/// +/// ``` +/// use valence_nbt::snbt::from_snbt_str; +/// use valence_nbt::Value; +/// +/// let value = from_snbt_str("1f").unwrap(); +/// assert_eq!(value, Value::Float(1.0)); +/// ``` +pub fn from_snbt_str(snbt: &str) -> Result { + SnbtReader::new(snbt).read() +} + +#[derive(Debug)] +pub struct SnbtWriter<'a> { + output: &'a mut String, +} + +impl<'a> SnbtWriter<'a> { + pub fn new(output: &'a mut String) -> Self { + Self { output } + } + + fn write_string(&mut self, s: &str) { + let mut need_quote = false; + for c in s.chars() { + if !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '+' | '.') { + need_quote = true; + break; + } + } + + if need_quote { + self.output.push('"'); + for c in s.chars() { + match c { + '"' => self.output.push_str("\\\""), + '\\' => self.output.push_str("\\\\"), + _ => self.output.push(c), + } + } + self.output.push('"'); + } else { + self.output.push_str(s); + } + } + + fn write_primitive_array<'b>( + &mut self, + prefix: &str, + iter: impl Iterator + 'b + Copy)>, + ) { + self.output.push('['); + self.output.push_str(prefix); + + let mut first = true; + + for v in iter { + if !first { + self.output.push(','); + } + first = false; + + self.write_element(&(*v).into()); + } + + self.output.push(']'); + } + + fn write_primitive(&mut self, postfix: &str, value: impl ToString) { + self.output.push_str(&value.to_string()); + self.output.push_str(postfix); + } + + fn write_list(&mut self, list: &List) { + macro_rules! variant_impl { + ($v:expr, $handle:expr) => {{ + self.output.push('['); + + let mut first = true; + for v in $v.iter() { + if !first { + self.output.push(','); + } + first = false; + $handle(v); + } + + self.output.push(']'); + }}; + } + #[allow(clippy::redundant_closure_call)] + match list { + List::Byte(v) => variant_impl!(v, |v| self.write_primitive("b", v)), + List::Short(v) => variant_impl!(v, |v| self.write_primitive("s", v)), + List::Int(v) => variant_impl!(v, |v| self.write_primitive("", v)), + List::Long(v) => variant_impl!(v, |v| self.write_primitive("l", v)), + List::Float(v) => variant_impl!(v, |v| self.write_primitive("f", v)), + List::Double(v) => variant_impl!(v, |v| self.write_primitive("d", v)), + List::ByteArray(v) => { + variant_impl!(v, |v: &Vec| self.write_primitive_array("B", v.iter())) + } + List::IntArray(v) => { + variant_impl!(v, |v: &Vec| self.write_primitive_array("", v.iter())) + } + List::LongArray(v) => { + variant_impl!(v, |v: &Vec| self.write_primitive_array("L", v.iter())) + } + List::String(v) => variant_impl!(v, |v| self.write_string(v)), + List::List(v) => variant_impl!(v, |v| self.write_list(v)), + List::Compound(v) => variant_impl!(v, |v| self.write_compound(v)), + List::End => self.output.push_str("[]"), + } + } + + fn write_compound(&mut self, compound: &Compound) { + self.output.push('{'); + + let mut first = true; + for (k, v) in compound { + if !first { + self.output.push(','); + } + + first = false; + + self.write_string(k); + self.output.push(':'); + self.write_element(v); + } + + self.output.push('}'); + } + + /// Write a value to the output. + pub fn write_element(&mut self, value: &Value) { + use Value::*; + match value { + Byte(v) => self.write_primitive("b", v), + Short(v) => self.write_primitive("s", v), + Int(v) => self.write_primitive("", v), + Long(v) => self.write_primitive("l", v), + Float(v) => self.write_primitive("f", v), + Double(v) => self.write_primitive("d", v), + ByteArray(v) => self.write_primitive_array("B;", v.iter()), + IntArray(v) => self.write_primitive_array("I;", v.iter()), + LongArray(v) => self.write_primitive_array("L;", v.iter()), + String(v) => self.write_string(v), + List(v) => self.write_list(v), + Compound(v) => self.write_compound(v), + } + } +} + +/// Convert a value to a string in SNBT format. +pub fn to_snbt_string(value: &Value) -> String { + let mut output = String::new(); + let mut writer = SnbtWriter::new(&mut output); + + writer.write_element(value); + + output +} + +impl Display for SnbtWriter<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.output) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_parse() { + let str = r#" + { + foo: 1, + 'bar': 1.0, + "baz": 1.0f, + "hello'": "hello world", + "world": "hello\"world", + 1.5f: 1.5d, + 3b: 2f, + bool: false, + more: { + iarr: [I; 1, 2, 3], + larr: [L; 1L, 2L, 3L], + }, + empty: [Bibabo ], + } + "#; + + let value = from_snbt_str(str).unwrap(); + let Value::Compound(cpd) = &value else { + unreachable!() + }; + + assert_eq!(*cpd.get("foo").unwrap(), 1_i32.into()); + assert_eq!(*cpd.get("bar").unwrap(), 1_f64.into()); + assert_eq!(*cpd.get("baz").unwrap(), 1_f32.into()); + assert_eq!(*cpd.get("hello'").unwrap(), "hello world".into()); + assert_eq!(*cpd.get("world").unwrap(), "hello\"world".into()); + assert_eq!(*cpd.get("1.5f").unwrap(), 1.5_f64.into()); + assert_eq!(*cpd.get("3b").unwrap(), 2_f32.into()); + assert_eq!(*cpd.get("bool").unwrap(), 0_i8.into()); + + let Some(Value::Compound(more)) = cpd.get("more") else { + unreachable!() + }; + + assert_eq!(*more.get("iarr").unwrap(), vec![1, 2, 3].into()); + + assert_eq!(*more.get("larr").unwrap(), vec![1_i64, 2, 3].into()); + + let Value::List(List::String(list)) = cpd.get("empty").unwrap() else { + unreachable!() + }; + + assert_eq!(list[0], "Bibabo"); + + assert_eq!( + from_snbt_str("\"\\n\"").unwrap_err().kind, + SnbtErrorKind::InvalidEscapeSequence + ); + + assert_eq!( + from_snbt_str("[L; 1]").unwrap_err().kind, + SnbtErrorKind::WrongTypeInArray + ); + + assert_eq!( + from_snbt_str("[L; 1L, 2L, 3L").unwrap_err().kind, + SnbtErrorKind::ReachEndOfStream + ); + + assert_eq!( + from_snbt_str("[L; 1L, 2L, 3L,]dewdwe").unwrap_err().kind, + SnbtErrorKind::TrailingData + ); + + assert_eq!( + from_snbt_str("{ foo: }").unwrap_err().kind, + SnbtErrorKind::ExpectValue + ); + + assert_eq!( + from_snbt_str("{ {}, }").unwrap_err().kind, + SnbtErrorKind::EmptyKeyInCompound + ); + + assert_eq!( + from_snbt_str("{ foo 1 }").unwrap_err().kind, + SnbtErrorKind::ExpectColon + ); + + assert_eq!( + from_snbt_str("{ foo: 1 bar: 2 }").unwrap_err().kind, + SnbtErrorKind::ExpectComma + ); + + assert_eq!( + from_snbt_str("[{}, []]").unwrap_err().kind, + SnbtErrorKind::DifferentTypesInList + ); + + assert_eq!( + from_snbt_str(&String::from_utf8(vec![b'e'; 32768]).unwrap()) + .unwrap_err() + .kind, + SnbtErrorKind::LongString + ); + + assert_eq!( + from_snbt_str( + &String::from_utf8([[b'['; MAX_DEPTH + 1], [b']'; MAX_DEPTH + 1]].concat()) + .unwrap() + ) + .unwrap_err() + .kind, + SnbtErrorKind::DepthLimitExceeded + ); + + #[cfg(feature = "preserve_order")] + assert_eq!( + to_snbt_string(&value), + r#"{foo:1,bar:1d,baz:1f,"hello'":"hello world",world:"hello\"world",1.5f:1.5d,3b:2f,bool:0b,more:{iarr:[I;1,2,3],larr:[L;1l,2l,3l]},empty:[Bibabo]}"# + ); + } +} diff --git a/valence_nbt/src/tag.rs b/valence_nbt/src/tag.rs new file mode 100644 index 0000000..ecb30f3 --- /dev/null +++ b/valence_nbt/src/tag.rs @@ -0,0 +1,19 @@ +/// One of the possible NBT data types. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[repr(u8)] +pub enum Tag { + // Variant order is significant! + End, + Byte, + Short, + Int, + Long, + Float, + Double, + ByteArray, + String, + List, + Compound, + IntArray, + LongArray, +} diff --git a/valence_nbt/src/value.rs b/valence_nbt/src/value.rs new file mode 100644 index 0000000..4d2d795 --- /dev/null +++ b/valence_nbt/src/value.rs @@ -0,0 +1,634 @@ +use std::borrow::Cow; +use std::hash::Hash; + +use crate::tag::Tag; +use crate::{Compound, List}; + +/// Represents an arbitrary NBT value. +#[derive(Clone, Debug)] +pub enum Value { + Byte(i8), + Short(i16), + Int(i32), + Long(i64), + Float(f32), + Double(f64), + ByteArray(Vec), + String(S), + List(List), + Compound(Compound), + IntArray(Vec), + LongArray(Vec), +} + +/// Represents a reference to an arbitrary NBT value, where the tag is not part +/// of the reference. +#[derive(Copy, Clone, Debug)] +pub enum ValueRef<'a, S = String> { + Byte(&'a i8), + Short(&'a i16), + Int(&'a i32), + Long(&'a i64), + Float(&'a f32), + Double(&'a f64), + ByteArray(&'a [i8]), + String(&'a S), + List(&'a List), + Compound(&'a Compound), + IntArray(&'a [i32]), + LongArray(&'a [i64]), +} + +/// Represents a mutable reference to an arbitrary NBT value, where the tag is +/// not part of the reference. +#[derive(Debug)] +pub enum ValueMut<'a, S = String> { + Byte(&'a mut i8), + Short(&'a mut i16), + Int(&'a mut i32), + Long(&'a mut i64), + Float(&'a mut f32), + Double(&'a mut f64), + ByteArray(&'a mut Vec), + String(&'a mut S), + List(&'a mut List), + Compound(&'a mut Compound), + IntArray(&'a mut Vec), + LongArray(&'a mut Vec), +} + +macro_rules! impl_value { + ($name:ident, $($lifetime:lifetime)?, ($($deref:tt)*), $($reference:tt)*) => { + macro_rules! as_number { + ($method_name:ident, $ty:ty, $($deref)*) => { + #[doc = concat!("If this value is a number, returns the `", stringify!($ty), "` representation of this value.")] + pub fn $method_name(&self) -> Option<$ty> { + #[allow(trivial_numeric_casts)] + match self { + Self::Byte(v) => Some($($deref)* v as $ty), + Self::Short(v) => Some($($deref)* v as $ty), + Self::Int(v) => Some($($deref)* v as $ty), + Self::Long(v) => Some($($deref)* v as $ty), + Self::Float(v) => Some(v.floor() as $ty), + Self::Double(v) => Some(v.floor() as $ty), + _ => None, + } + } + } + } + + macro_rules! as_number_float { + ($method_name:ident, $ty:ty, $($deref)*) => { + #[doc = concat!("If this value is a number, returns the `", stringify!($ty), "` representation of this value.")] + pub fn $method_name(&self) -> Option<$ty> { + #[allow(trivial_numeric_casts)] + match self { + Self::Byte(v) => Some($($deref)* v as $ty), + Self::Short(v) => Some($($deref)* v as $ty), + Self::Int(v) => Some($($deref)* v as $ty), + Self::Long(v) => Some($($deref)* v as $ty), + Self::Float(v) => Some($($deref)* v as $ty), + Self::Double(v) => Some($($deref)* v as $ty), + _ => None, + } + } + } + } + + impl <$($lifetime,)? S> $name<$($lifetime,)? S> { + /// Returns the type of this value. + pub fn tag(&self) -> Tag { + match self { + Self::Byte(_) => Tag::Byte, + Self::Short(_) => Tag::Short, + Self::Int(_) => Tag::Int, + Self::Long(_) => Tag::Long, + Self::Float(_) => Tag::Float, + Self::Double(_) => Tag::Double, + Self::ByteArray(_) => Tag::ByteArray, + Self::String(_) => Tag::String, + Self::List(_) => Tag::List, + Self::Compound(_) => Tag::Compound, + Self::IntArray(_) => Tag::IntArray, + Self::LongArray(_) => Tag::LongArray, + } + } + + /// Returns whether this value is a number, i.e. a byte, short, int, long, float or double. + pub fn is_number(&self) -> bool { + match self { + Self::Byte(_) | Self::Short(_) | Self::Int(_) | Self::Long(_) | Self::Float(_) | Self::Double(_) => true, + _ => false, + } + } + + as_number!(as_i8, i8, $($deref)*); + as_number!(as_i16, i16, $($deref)*); + as_number!(as_i32, i32, $($deref)*); + as_number!(as_i64, i64, $($deref)*); + as_number_float!(as_f32, f32, $($deref)*); + as_number_float!(as_f64, f64, $($deref)*); + + /// If this value is a number, returns the `bool` representation of this value. + pub fn as_bool(&self) -> Option { + self.as_i8().map(|v| v != 0) + } + } + + impl <$($lifetime,)? S> From<$($reference)* i8> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* i8) -> Self { + Self::Byte(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* i16> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* i16) -> Self { + Self::Short(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* i32> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* i32) -> Self { + Self::Int(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* i64> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* i64) -> Self { + Self::Long(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* f32> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* f32) -> Self { + Self::Float(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* f64> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* f64) -> Self { + Self::Double(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* List> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* List) -> Self { + Self::List(v) + } + } + + impl <$($lifetime,)? S> From<$($reference)* Compound> for $name<$($lifetime,)? S> { + fn from(v: $($reference)* Compound) -> Self { + Self::Compound(v) + } + } + + impl <$($lifetime,)? S> PartialEq for $name<$($lifetime,)? S> where S: Ord + Hash { + fn eq(&self, other: &Self) -> bool { + match self { + Self::Byte(v) => matches!(other, Self::Byte(other_v) if v == other_v), + Self::Short(v) => matches!(other, Self::Short(other_v) if v == other_v), + Self::Int(v) => matches!(other, Self::Int(other_v) if v == other_v), + Self::Long(v) => matches!(other, Self::Long(other_v) if v == other_v), + Self::Float(v) => matches!(other, Self::Float(other_v) if v == other_v), + Self::Double(v) => matches!(other, Self::Double(other_v) if v == other_v), + Self::ByteArray(v) => matches!(other, Self::ByteArray(other_v) if v == other_v), + Self::String(v) => matches!(other, Self::String(other_v) if v == other_v), + Self::List(v) => matches!(other, Self::List(other_v) if v == other_v), + Self::Compound(v) => matches!(other, Self::Compound(other_v) if v == other_v), + Self::IntArray(v) => matches!(other, Self::IntArray(other_v) if v == other_v), + Self::LongArray(v) => matches!(other, Self::LongArray(other_v) if v == other_v), + } + } + } + } +} + +impl_value!(Value,,(*),); +impl_value!(ValueRef, 'a, (**), &'a); +impl_value!(ValueMut, 'a, (**), &'a mut); + +impl Value { + /// Converts a reference to a value to a [`ValueRef`]. + pub fn as_value_ref(&self) -> ValueRef { + match self { + Value::Byte(v) => ValueRef::Byte(v), + Value::Short(v) => ValueRef::Short(v), + Value::Int(v) => ValueRef::Int(v), + Value::Long(v) => ValueRef::Long(v), + Value::Float(v) => ValueRef::Float(v), + Value::Double(v) => ValueRef::Double(v), + Value::ByteArray(v) => ValueRef::ByteArray(&v[..]), + Value::String(v) => ValueRef::String(v), + Value::List(v) => ValueRef::List(v), + Value::Compound(v) => ValueRef::Compound(v), + Value::IntArray(v) => ValueRef::IntArray(&v[..]), + Value::LongArray(v) => ValueRef::LongArray(&v[..]), + } + } + + /// Converts a mutable reference to a value to a [`ValueMut`]. + pub fn as_value_mut(&mut self) -> ValueMut { + match self { + Value::Byte(v) => ValueMut::Byte(v), + Value::Short(v) => ValueMut::Short(v), + Value::Int(v) => ValueMut::Int(v), + Value::Long(v) => ValueMut::Long(v), + Value::Float(v) => ValueMut::Float(v), + Value::Double(v) => ValueMut::Double(v), + Value::ByteArray(v) => ValueMut::ByteArray(v), + Value::String(v) => ValueMut::String(v), + Value::List(v) => ValueMut::List(v), + Value::Compound(v) => ValueMut::Compound(v), + Value::IntArray(v) => ValueMut::IntArray(v), + Value::LongArray(v) => ValueMut::LongArray(v), + } + } +} + +impl ValueRef<'_, S> +where + S: Clone, +{ + /// Clones this value reference to a new owned [`Value`]. + pub fn to_value(&self) -> Value { + match *self { + ValueRef::Byte(v) => Value::Byte(*v), + ValueRef::Short(v) => Value::Short(*v), + ValueRef::Int(v) => Value::Int(*v), + ValueRef::Long(v) => Value::Long(*v), + ValueRef::Float(v) => Value::Float(*v), + ValueRef::Double(v) => Value::Double(*v), + ValueRef::ByteArray(v) => Value::ByteArray(v.to_vec()), + ValueRef::String(v) => Value::String(v.to_owned()), + ValueRef::List(v) => Value::List(v.clone()), + ValueRef::Compound(v) => Value::Compound(v.clone()), + ValueRef::IntArray(v) => Value::IntArray(v.to_vec()), + ValueRef::LongArray(v) => Value::LongArray(v.to_vec()), + } + } +} + +impl ValueMut<'_, S> +where + S: Clone, +{ + /// Clones this mutable value reference to a new owned [`Value`]. + pub fn to_value(&self) -> Value { + match self { + ValueMut::Byte(v) => Value::Byte(**v), + ValueMut::Short(v) => Value::Short(**v), + ValueMut::Int(v) => Value::Int(**v), + ValueMut::Long(v) => Value::Long(**v), + ValueMut::Float(v) => Value::Float(**v), + ValueMut::Double(v) => Value::Double(**v), + ValueMut::ByteArray(v) => Value::ByteArray((*v).clone()), + ValueMut::String(v) => Value::String((*v).clone()), + ValueMut::List(v) => Value::List((*v).clone()), + ValueMut::Compound(v) => Value::Compound((*v).clone()), + ValueMut::IntArray(v) => Value::IntArray((*v).clone()), + ValueMut::LongArray(v) => Value::LongArray((*v).clone()), + } + } +} + +impl<'a, S> ValueMut<'a, S> { + /// Downgrades this mutable value reference into an immutable [`ValueRef`]. + pub fn into_value_ref(self) -> ValueRef<'a, S> { + match self { + ValueMut::Byte(v) => ValueRef::Byte(v), + ValueMut::Short(v) => ValueRef::Short(v), + ValueMut::Int(v) => ValueRef::Int(v), + ValueMut::Long(v) => ValueRef::Long(v), + ValueMut::Float(v) => ValueRef::Float(v), + ValueMut::Double(v) => ValueRef::Double(v), + ValueMut::ByteArray(v) => ValueRef::ByteArray(&v[..]), + ValueMut::String(v) => ValueRef::String(v), + ValueMut::List(v) => ValueRef::List(v), + ValueMut::Compound(v) => ValueRef::Compound(v), + ValueMut::IntArray(v) => ValueRef::IntArray(&v[..]), + ValueMut::LongArray(v) => ValueRef::LongArray(&v[..]), + } + } +} + +/// Bools are usually represented as `0` or `1` bytes in NBT. +impl From for Value { + fn from(b: bool) -> Self { + Value::Byte(b.into()) + } +} + +impl From> for Value { + fn from(v: Vec) -> Self { + Self::ByteArray(v) + } +} + +impl From for Value { + fn from(v: String) -> Self { + Self::String(v) + } +} + +impl From<&String> for Value { + fn from(value: &String) -> Self { + Self::String(value.clone()) + } +} + +impl<'a> From<&'a str> for Value { + fn from(v: &'a str) -> Self { + Self::String(v.to_owned()) + } +} + +impl<'a> From> for Value { + fn from(v: Cow<'a, str>) -> Self { + Self::String(v.into_owned()) + } +} + +impl From for Value> { + fn from(v: String) -> Self { + Self::String(Cow::Owned(v)) + } +} + +impl<'a> From<&'a String> for Value> { + fn from(v: &'a String) -> Self { + Self::String(Cow::Borrowed(v)) + } +} + +impl<'a> From<&'a str> for Value> { + fn from(v: &'a str) -> Self { + Self::String(Cow::Borrowed(v)) + } +} + +impl<'a> From> for Value> { + fn from(v: Cow<'a, str>) -> Self { + Self::String(v) + } +} + +#[cfg(feature = "java_string")] +impl From for Value { + fn from(v: java_string::JavaString) -> Self { + Self::String(v) + } +} + +#[cfg(feature = "java_string")] +impl From<&java_string::JavaString> for Value { + fn from(v: &java_string::JavaString) -> Self { + Self::String(v.clone()) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From<&'a java_string::JavaStr> for Value { + fn from(v: &'a java_string::JavaStr) -> Self { + Self::String(v.to_owned()) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From> for Value { + fn from(v: Cow<'a, java_string::JavaStr>) -> Self { + Self::String(v.into_owned()) + } +} + +#[cfg(feature = "java_string")] +impl From for Value { + fn from(v: String) -> Self { + Self::String(java_string::JavaString::from(v)) + } +} + +#[cfg(feature = "java_string")] +impl From<&String> for Value { + fn from(v: &String) -> Self { + Self::String(java_string::JavaString::from(v)) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From<&'a str> for Value { + fn from(v: &'a str) -> Self { + Self::String(java_string::JavaString::from(v)) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From> for Value { + fn from(v: Cow<'a, str>) -> Self { + Self::String(java_string::JavaString::from(v)) + } +} + +#[cfg(feature = "java_string")] +impl From for Value> { + fn from(v: java_string::JavaString) -> Self { + Self::String(Cow::Owned(v)) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From<&'a java_string::JavaString> for Value> { + fn from(v: &'a java_string::JavaString) -> Self { + Self::String(Cow::Borrowed(v)) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From<&'a java_string::JavaStr> for Value> { + fn from(v: &'a java_string::JavaStr) -> Self { + Self::String(Cow::Borrowed(v)) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From> for Value> { + fn from(v: Cow<'a, java_string::JavaStr>) -> Self { + Self::String(v) + } +} + +#[cfg(feature = "java_string")] +impl From for Value> { + fn from(v: String) -> Self { + Self::String(Cow::Owned(java_string::JavaString::from(v))) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From<&'a String> for Value> { + fn from(v: &'a String) -> Self { + Self::String(Cow::Borrowed(java_string::JavaStr::from_str(v))) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From<&'a str> for Value> { + fn from(v: &'a str) -> Self { + Self::String(Cow::Borrowed(java_string::JavaStr::from_str(v))) + } +} + +#[cfg(feature = "java_string")] +impl<'a> From> for Value> { + fn from(v: Cow<'a, str>) -> Self { + Self::String(match v { + Cow::Borrowed(str) => Cow::Borrowed(java_string::JavaStr::from_str(str)), + Cow::Owned(str) => Cow::Owned(java_string::JavaString::from(str)), + }) + } +} + +impl From> for Value { + fn from(v: Vec) -> Self { + Self::IntArray(v) + } +} + +impl From> for Value { + fn from(v: Vec) -> Self { + Self::LongArray(v) + } +} + +impl From> for Value +where + S: Clone, +{ + fn from(v: ValueRef) -> Self { + v.to_value() + } +} + +impl From<&ValueRef<'_, S>> for Value +where + S: Clone, +{ + fn from(v: &ValueRef) -> Self { + v.to_value() + } +} + +impl From> for Value +where + S: Clone, +{ + fn from(v: ValueMut) -> Self { + v.to_value() + } +} + +impl From<&ValueMut<'_, S>> for Value +where + S: Clone, +{ + fn from(v: &ValueMut) -> Self { + v.to_value() + } +} + +#[cfg(feature = "uuid")] +impl From for Value { + fn from(value: uuid::Uuid) -> Self { + let (most, least) = value.as_u64_pair(); + + let first = (most >> 32) as i32; + let second = most as i32; + let third = (least >> 32) as i32; + let fourth = least as i32; + + Value::IntArray(vec![first, second, third, fourth]) + } +} + +#[cfg(feature = "valence_ident")] +impl From> for Value +where + I: Into>, +{ + fn from(value: valence_ident::Ident) -> Self { + value.into_inner().into() + } +} + +impl<'a> From<&'a [i8]> for ValueRef<'a> { + fn from(v: &'a [i8]) -> Self { + Self::ByteArray(v) + } +} + +impl<'a> From<&'a String> for ValueRef<'a, String> { + fn from(v: &'a String) -> ValueRef<'a> { + Self::String(v) + } +} + +impl<'a, S> From<&'a [i32]> for ValueRef<'a, S> { + fn from(v: &'a [i32]) -> Self { + Self::IntArray(v) + } +} + +impl<'a, S> From<&'a [i64]> for ValueRef<'a, S> { + fn from(v: &'a [i64]) -> Self { + Self::LongArray(v) + } +} + +impl<'a, S> From<&'a Value> for ValueRef<'a, S> { + fn from(v: &'a Value) -> Self { + v.as_value_ref() + } +} + +impl<'a, S> From> for ValueRef<'a, S> { + fn from(v: ValueMut<'a, S>) -> Self { + v.into_value_ref() + } +} + +#[cfg(feature = "valence_ident")] +impl<'a> From<&'a valence_ident::Ident> for ValueRef<'a, String> { + fn from(v: &'a valence_ident::Ident) -> Self { + Self::String(v.as_ref()) + } +} + +impl<'a, S> From<&'a mut Vec> for ValueMut<'a, S> { + fn from(v: &'a mut Vec) -> Self { + Self::ByteArray(v) + } +} + +impl<'a> From<&'a mut String> for ValueMut<'a, String> { + fn from(v: &'a mut String) -> Self { + Self::String(v) + } +} + +impl<'a, S> From<&'a mut Vec> for ValueMut<'a, S> { + fn from(v: &'a mut Vec) -> Self { + Self::IntArray(v) + } +} + +impl<'a, S> From<&'a mut Vec> for ValueMut<'a, S> { + fn from(v: &'a mut Vec) -> Self { + Self::LongArray(v) + } +} + +impl<'a, S> From<&'a mut Value> for ValueMut<'a, S> { + fn from(v: &'a mut Value) -> Self { + v.as_value_mut() + } +}