diff --git a/src/builder.rs b/src/builder.rs index ed24874..c849884 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -312,7 +312,7 @@ impl CompressorBuilder { /// with an existing symbol. pub fn insert(&mut self, symbol: Symbol, len: usize) -> bool { assert!(self.n_symbols < 255, "cannot insert into full symbol table"); - debug_assert!(len == symbol.len(), "provided len != symbol.len()"); + assert_eq!(len, symbol.len(), "provided len must equal symbol.len()"); if len == 2 { // shortCodes @@ -387,7 +387,6 @@ impl CompressorBuilder { /// /// Also returns the lengths vector, which is of length `n_symbols` and contains the /// length for each of the values. - #[inline(never)] fn finalize(&mut self) -> (u8, Vec) { // Create a cumulative sum of each of the elements of the input line numbers. // Do a map that includes the previously seen value as well. @@ -534,7 +533,7 @@ const FSST_SAMPLELINE: usize = 512; /// SAFETY: sample_buf must be >= FSST_SAMPLEMAX bytes long. Providing something less may cause unexpected failures. #[allow(clippy::ptr_arg)] fn make_sample<'a, 'b: 'a>(sample_buf: &'a mut Vec, str_in: &Vec<&'b [u8]>) -> Vec<&'a [u8]> { - debug_assert!( + assert!( sample_buf.capacity() >= FSST_SAMPLEMAX, "sample_buf.len() < FSST_SAMPLEMAX" ); @@ -700,7 +699,7 @@ impl CompressorBuilder { } let remaining_bytes = unsafe { in_end.byte_offset_from(in_ptr) }; - debug_assert!( + assert!( remaining_bytes.is_positive(), "in_ptr exceeded in_end, should not be possible" ); diff --git a/src/lib.rs b/src/lib.rs index 4f00b47..62444ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,14 +29,7 @@ impl Symbol { /// Constructor for a `Symbol` from an 8-element byte slice. pub fn from_slice(slice: &[u8; 8]) -> Self { - let num: u64 = slice[0] as u64 - | (slice[1] as u64) << 8 - | (slice[2] as u64) << 16 - | (slice[3] as u64) << 24 - | (slice[4] as u64) << 32 - | (slice[5] as u64) << 40 - | (slice[6] as u64) << 48 - | (slice[7] as u64) << 56; + let num: u64 = u64::from_le_bytes(*slice); Self(num) } @@ -106,7 +99,7 @@ impl Symbol { /// Return a new `Symbol` by logically concatenating ourselves with another `Symbol`. pub fn concat(self, other: Self) -> Self { - debug_assert!( + assert!( self.len() + other.len() <= 8, "cannot build symbol with length > 8" ); @@ -171,9 +164,6 @@ pub const FSST_CODE_BITS: usize = 9; /// First bit of the "length" portion of an extended code. pub const FSST_LEN_BITS: usize = 12; -/// A code that never appears in practice, indicating an unused slot. -pub const FSST_CODE_UNUSED: u16 = 1u16 << FSST_CODE_BITS; - /// Maximum code value in the extended code range. pub const FSST_CODE_MAX: u16 = 1 << FSST_CODE_BITS; @@ -253,7 +243,7 @@ impl<'a> Decompressor<'a> { /// If the provided symbol table has length greater than 256 pub fn new(symbols: &'a [Symbol], lengths: &'a [u8]) -> Self { assert!( - symbols.len() <= 255, + symbols.len() < FSST_CODE_BASE as usize, "symbol table cannot have size exceeding 255" ); @@ -295,7 +285,7 @@ impl<'a> Decompressor<'a> { } } - debug_assert!( + assert!( in_pos >= compressed.len(), "decompression should exhaust input before output" ); @@ -350,7 +340,7 @@ pub struct Compressor { /// The core structure of the FSST codec, holding a mapping between `Symbol`s and `Code`s. /// /// The symbol table is trained on a corpus of data in the form of a single byte array, building up -/// a mapping of 1-byte "codes" to sequences of up to `N` plaintext bytse, or "symbols". +/// a mapping of 1-byte "codes" to sequences of up to 8 plaintext bytes, or "symbols". impl Compressor { /// Using the symbol table, runs a single cycle of compression on an input word, writing /// the output into `out_ptr`. @@ -367,7 +357,6 @@ impl Compressor { /// # Safety /// /// `out_ptr` must never be NULL or otherwise point to invalid memory. - #[inline(never)] pub unsafe fn compress_word(&self, word: u64, out_ptr: *mut u8) -> (usize, usize) { // Speculatively write the first byte of `word` at offset 1. This is necessary if it is an escape, and // if it isn't, it will be overwritten anyway. diff --git a/src/lossy_pht.rs b/src/lossy_pht.rs index 9570d70..8966ebf 100644 --- a/src/lossy_pht.rs +++ b/src/lossy_pht.rs @@ -1,19 +1,15 @@ -// TODO: remove -#![allow(unused)] - use std::fmt::Debug; use crate::builder::fsst_hash; +use crate::Code; use crate::Symbol; -use crate::FSST_CODE_MASK; -use crate::{Code, FSST_CODE_UNUSED}; /// Size of the perfect hash table. /// /// NOTE: this differs from the paper, which recommends a 64KB total /// table size. The paper does not account for the fact that most /// vendors split the L1 cache into 32KB of instruction and 32KB of data. -pub const HASH_TABLE_SIZE: usize = 1 << 12; +pub const HASH_TABLE_SIZE: usize = 1 << 11; /// A single entry in the [Lossy Perfect Hash Table][`LossyPHT`]. ///