From 56ae8fc9ac77246b40a7c864d213549ced347b1c Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 30 Dec 2024 08:45:20 -1000 Subject: [PATCH 1/4] Decode into --- src/lib.rs | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0b77848..b909928 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(maybe_uninit_write_slice)] #![doc = include_str!("../README.md")] #![cfg(target_endian = "little")] @@ -10,6 +11,7 @@ macro_rules! assert_sizeof { use lossy_pht::LossyPHT; use std::fmt::{Debug, Formatter}; +use std::mem::MaybeUninit; mod builder; mod lossy_pht; @@ -250,18 +252,32 @@ impl<'a> Decompressor<'a> { Self { symbols, lengths } } + /// Returns the capacity required for decompression. + pub fn decompressed_capacity(&self, compressed: &[u8]) -> usize { + size_of::() * (compressed.len() + 1) + } + /// Decompress a byte slice that was previously returned by a compressor using - /// the same symbol table. - pub fn decompress(&self, compressed: &[u8]) -> Vec { - let mut decoded: Vec = Vec::with_capacity(size_of::() * (compressed.len() + 1)); + /// the same symbol table into an uninitialized slice of bytes. + /// + /// Returns the length of the decoded bytes. + /// + /// ## Panics + /// + /// If the decoded slice is not the same length as the `decompressed_capacity`. + pub fn decompress_into(&self, compressed: &[u8], decoded: &mut [MaybeUninit]) -> usize { + assert_eq!( + decoded.len(), + self.decompressed_capacity(compressed), + "decoded slice must have the same length as the decompressed capacity" + ); let ptr = decoded.as_mut_ptr(); let mut in_pos = 0; let mut out_pos = 0; while in_pos < compressed.len() { - // out_pos can grow at most 8 bytes per iteration, and we start at 0 - debug_assert!(out_pos <= decoded.capacity() - size_of::()); + debug_assert!(out_pos <= decoded.len() - size_of::()); // SAFETY: in_pos is always in range 0..compressed.len() let code = unsafe { *compressed.get_unchecked(in_pos) }; if code == ESCAPE_CODE { @@ -270,8 +286,9 @@ impl<'a> Decompressor<'a> { // SAFETY: out_pos is always 8 bytes or more from the end of decoded buffer // SAFETY: ESCAPE_CODE can not be the last byte of the compressed stream unsafe { - let write_addr = ptr.byte_add(out_pos); - std::ptr::write(write_addr, *compressed.get_unchecked(in_pos)); + decoded + .get_unchecked_mut(out_pos) + .write(*compressed.get_unchecked(in_pos)); } out_pos += 1; in_pos += 1; @@ -281,11 +298,10 @@ impl<'a> Decompressor<'a> { let symbol = unsafe { *self.symbols.get_unchecked(code as usize) }; let length = unsafe { *self.lengths.get_unchecked(code as usize) }; // SAFETY: out_pos is always 8 bytes or more from the end of decoded buffer - unsafe { - let write_addr = ptr.byte_add(out_pos) as *mut u64; - // Perform 8 byte unaligned write. - write_addr.write_unaligned(symbol.as_u64()); - } + MaybeUninit::copy_from_slice( + unsafe { decoded.get_unchecked_mut(out_pos..out_pos + length as usize) }, + &symbol.0.to_le_bytes()[0..length as usize], + ); in_pos += 1; out_pos += length as usize; } @@ -296,9 +312,15 @@ impl<'a> Decompressor<'a> { "decompression should exhaust input before output" ); - // SAFETY: we enforce in the loop condition that out_pos <= decoded.capacity() - unsafe { decoded.set_len(out_pos) }; + out_pos + } + /// Decompress a byte slice that was previously returned by a compressor using the same symbol + /// table into a new vector of bytes. + pub fn decompress(&self, compressed: &[u8]) -> Vec { + let mut decoded = Vec::with_capacity(self.decompressed_capacity(compressed)); + let len = self.decompress_into(compressed, decoded.spare_capacity_mut()); + unsafe { decoded.set_len(len) }; decoded } } From e2ebefe730b6fcf5aea4b1578f2c563bcfb49bde Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 30 Dec 2024 08:46:51 -1000 Subject: [PATCH 2/4] Decode into --- src/lib.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b909928..5881718 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(maybe_uninit_write_slice)] #![doc = include_str!("../README.md")] #![cfg(target_endian = "little")] @@ -265,18 +264,16 @@ impl<'a> Decompressor<'a> { /// ## Panics /// /// If the decoded slice is not the same length as the `decompressed_capacity`. - pub fn decompress_into(&self, compressed: &[u8], decoded: &mut [MaybeUninit]) -> usize { - assert_eq!( - decoded.len(), - self.decompressed_capacity(compressed), - "decoded slice must have the same length as the decompressed capacity" - ); - let ptr = decoded.as_mut_ptr(); + pub fn decompress_into(&self, compressed: &[u8], decoded: &mut [MaybeUninit]) -> usize + { + assert_eq!(decoded.len(), self.decompressed_capacity(compressed), "decoded slice must have the same length as the decompressed capacity"); + let ptr: *mut u8 = decoded.as_mut_ptr().cast(); let mut in_pos = 0; let mut out_pos = 0; while in_pos < compressed.len() { + // out_pos can grow at most 8 bytes per iteration, and we start at 0 debug_assert!(out_pos <= decoded.len() - size_of::()); // SAFETY: in_pos is always in range 0..compressed.len() let code = unsafe { *compressed.get_unchecked(in_pos) }; @@ -286,9 +283,8 @@ impl<'a> Decompressor<'a> { // SAFETY: out_pos is always 8 bytes or more from the end of decoded buffer // SAFETY: ESCAPE_CODE can not be the last byte of the compressed stream unsafe { - decoded - .get_unchecked_mut(out_pos) - .write(*compressed.get_unchecked(in_pos)); + let write_addr = ptr.byte_add(out_pos); + std::ptr::write(write_addr, *compressed.get_unchecked(in_pos)); } out_pos += 1; in_pos += 1; @@ -298,10 +294,11 @@ impl<'a> Decompressor<'a> { let symbol = unsafe { *self.symbols.get_unchecked(code as usize) }; let length = unsafe { *self.lengths.get_unchecked(code as usize) }; // SAFETY: out_pos is always 8 bytes or more from the end of decoded buffer - MaybeUninit::copy_from_slice( - unsafe { decoded.get_unchecked_mut(out_pos..out_pos + length as usize) }, - &symbol.0.to_le_bytes()[0..length as usize], - ); + unsafe { + let write_addr = ptr.byte_add(out_pos) as *mut u64; + // Perform 8 byte unaligned write. + write_addr.write_unaligned(symbol.as_u64()); + } in_pos += 1; out_pos += length as usize; } From bf79a860db2afcfb7396c924eb675b91e3db5632 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 30 Dec 2024 08:49:52 -1000 Subject: [PATCH 3/4] Decode into --- src/lib.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5881718..d204c3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -264,9 +264,12 @@ impl<'a> Decompressor<'a> { /// ## Panics /// /// If the decoded slice is not the same length as the `decompressed_capacity`. - pub fn decompress_into(&self, compressed: &[u8], decoded: &mut [MaybeUninit]) -> usize - { - assert_eq!(decoded.len(), self.decompressed_capacity(compressed), "decoded slice must have the same length as the decompressed capacity"); + pub fn decompress_into(&self, compressed: &[u8], decoded: &mut [MaybeUninit]) -> usize { + assert_eq!( + decoded.len(), + self.decompressed_capacity(compressed), + "decoded slice must have the same length as the decompressed capacity" + ); let ptr: *mut u8 = decoded.as_mut_ptr().cast(); let mut in_pos = 0; From 66fec9b272f9c135eb947452abf35698578c422c Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 30 Dec 2024 09:54:18 -1000 Subject: [PATCH 4/4] Rename fn --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d204c3e..332b3dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -251,8 +251,8 @@ impl<'a> Decompressor<'a> { Self { symbols, lengths } } - /// Returns the capacity required for decompression. - pub fn decompressed_capacity(&self, compressed: &[u8]) -> usize { + /// Returns an upper bound on the size of the decompressed data. + pub fn max_decompression_capacity(&self, compressed: &[u8]) -> usize { size_of::() * (compressed.len() + 1) } @@ -267,7 +267,7 @@ impl<'a> Decompressor<'a> { pub fn decompress_into(&self, compressed: &[u8], decoded: &mut [MaybeUninit]) -> usize { assert_eq!( decoded.len(), - self.decompressed_capacity(compressed), + self.max_decompression_capacity(compressed), "decoded slice must have the same length as the decompressed capacity" ); let ptr: *mut u8 = decoded.as_mut_ptr().cast(); @@ -318,7 +318,7 @@ impl<'a> Decompressor<'a> { /// Decompress a byte slice that was previously returned by a compressor using the same symbol /// table into a new vector of bytes. pub fn decompress(&self, compressed: &[u8]) -> Vec { - let mut decoded = Vec::with_capacity(self.decompressed_capacity(compressed)); + let mut decoded = Vec::with_capacity(self.max_decompression_capacity(compressed)); let len = self.decompress_into(compressed, decoded.spare_capacity_mut()); unsafe { decoded.set_len(len) }; decoded