Skip to content

Commit

Permalink
feat: recognize and use over sized allocations
Browse files Browse the repository at this point in the history
Allocators are allowed to return a larger memory chunk than was asked
for. If the amount extra is large enough, then the hash map can use the
extra space. The Global allocator will not hit this path, because it
won't over-size enough to matter, but custom allocators may. An example
of an allocator which allocates full system pages is included in the
test suite (Unix only because it uses `mmap`).
  • Loading branch information
morrisonlevi committed May 8, 2024
1 parent f637220 commit 89f6d1f
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 31 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ doc-comment = "0.3.1"
bumpalo = { version = "3.13.0", features = ["allocator-api2"] }
rkyv = { version = "0.7.42", features = ["validation"] }

[target.'cfg(unix)'.dev-dependencies]
libc = "0.2"

[features]
default = ["ahash", "inline-more", "allocator-api2"]

Expand Down
100 changes: 100 additions & 0 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8958,3 +8958,103 @@ mod test_map {
assert_eq!(dropped.load(Ordering::SeqCst), 0);
}
}

#[cfg(all(test, unix))]
mod test_map_with_mmap_allocations {
use super::HashMap;
use allocator_api2::alloc::{AllocError, Allocator};
use core::alloc::Layout;
use core::ptr::{null_mut, NonNull};

/// This is not a production quality allocator, just good enough for
/// some basic tests.
#[derive(Clone, Copy, Debug)]
struct MmapAllocator {
/// Guarantee this is a power of 2.
page_size: usize,
}

impl MmapAllocator {
fn new() -> Result<Self, AllocError> {
let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
if result < 1 {
return Err(AllocError);
}

let page_size = result as usize;
if !page_size.is_power_of_two() {
Err(AllocError)
} else {
Ok(Self { page_size })
}
}

fn fit_to_page_size(&self, n: usize) -> Result<usize, AllocError> {
// If n=0, give a single page (wasteful, I know).
let n = if n == 0 { self.page_size } else { n };

match n & (self.page_size - 1) {
0 => Ok(n),
rem => n.checked_add(self.page_size - rem).ok_or(AllocError),
}
}
}

unsafe impl Allocator for MmapAllocator {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
if layout.align() > self.page_size {
return Err(AllocError);
}

let size = self.fit_to_page_size(layout.size())?;
let null = null_mut();
let len = size as libc::size_t;
let prot = libc::PROT_READ | libc::PROT_WRITE;
let flags = libc::MAP_PRIVATE | libc::MAP_ANON;
let result = unsafe { libc::mmap(null, len, prot, flags, -1, 0) };

if result == libc::MAP_FAILED {
return Err(AllocError);
}

let addr = NonNull::new(result.cast()).ok_or(AllocError)?;
Ok(NonNull::slice_from_raw_parts(addr, size))
}

unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
// If they allocated it with this layout, it must round correctly.
let size = self.fit_to_page_size(layout.size()).unwrap();
_ = libc::munmap(ptr.as_ptr().cast(), size);
}
}

#[test]
fn test_tiny_allocation_gets_rounded_to_page_size() {
let alloc = MmapAllocator::new().unwrap();
let mut map: HashMap<usize, (), _, _> = HashMap::with_capacity_in(1, alloc);

let rough_bucket_size = core::mem::size_of::<(usize, (), usize)>();
let x = alloc.page_size / rough_bucket_size;
// x * ¾ should account for control bytes and also load factor, at
// least for realistic page sizes (4096+).
let min_elems = x / 4 * 3;
let capacity = map.capacity();
assert!(capacity > min_elems, "failed: {capacity} > {min_elems}");

// Fill it up.
for i in 0..capacity {
map.insert(i, ());
}
// Capacity should not have changed and it should be full.
assert_eq!(capacity, map.len());
assert_eq!(capacity, map.capacity());

// Alright, make it grow.
map.insert(capacity, ());
assert!(
capacity < map.capacity(),
"failed: {capacity} < {}",
map.capacity()
);
}
}
45 changes: 17 additions & 28 deletions src/raw/alloc.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
pub(crate) use self::inner::{do_alloc, Allocator, Global};
pub(crate) use self::inner::{Allocator, Global};
use crate::alloc::alloc::Layout;
use core::ptr::NonNull;

#[allow(clippy::map_err_ignore)]
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<[u8]>, ()> {
match alloc.allocate(layout) {
Ok(ptr) => Ok(ptr),
Err(_) => Err(()),
}
}

// Nightly-case.
// Use unstable `allocator_api` feature.
// This is compatible with `allocator-api2` which can be enabled or not.
// This is used when building for `std`.
#[cfg(feature = "nightly")]
mod inner {
use crate::alloc::alloc::Layout;
pub use crate::alloc::alloc::{Allocator, Global};
use core::ptr::NonNull;

#[allow(clippy::map_err_ignore)]
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
match alloc.allocate(layout) {
Ok(ptr) => Ok(ptr.as_non_null_ptr()),
Err(_) => Err(()),
}
}
}

// Basic non-nightly case.
Expand All @@ -27,17 +27,7 @@ mod inner {
// `core::alloc::Allocator`.
#[cfg(all(not(feature = "nightly"), feature = "allocator-api2"))]
mod inner {
use crate::alloc::alloc::Layout;
pub use allocator_api2::alloc::{Allocator, Global};
use core::ptr::NonNull;

#[allow(clippy::map_err_ignore)]
pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
match alloc.allocate(layout) {
Ok(ptr) => Ok(ptr.cast()),
Err(_) => Err(()),
}
}
}

// No-defaults case.
Expand All @@ -55,7 +45,7 @@ mod inner {

#[allow(clippy::missing_safety_doc)] // not exposed outside of this crate
pub unsafe trait Allocator {
fn allocate(&self, layout: Layout) -> Result<NonNull<u8>, ()>;
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, ()>;
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout);
}

Expand All @@ -64,8 +54,11 @@ mod inner {

unsafe impl Allocator for Global {
#[inline]
fn allocate(&self, layout: Layout) -> Result<NonNull<u8>, ()> {
unsafe { NonNull::new(alloc(layout)).ok_or(()) }
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, ()> {
match unsafe { NonNull::new(alloc(layout)) } {
Some(ptr) => Ok(NonNull::slice_from_raw_parts(ptr, layout.size())),
None => Err(()),
}
}
#[inline]
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
Expand All @@ -79,8 +72,4 @@ mod inner {
Global
}
}

pub(crate) fn do_alloc<A: Allocator>(alloc: &A, layout: Layout) -> Result<NonNull<u8>, ()> {
alloc.allocate(layout)
}
}
73 changes: 70 additions & 3 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1736,6 +1736,40 @@ impl RawTableInner {
}
}

/// Find the previous power of 2. If it's already a power of 2, it's unchanged.
/// Passing zero is undefined behavior.
fn prev_pow2(z: usize) -> usize {
let shift = mem::size_of::<usize>() * 8 - 1;
1 << (shift - (z.leading_zeros() as usize))
}

fn maximum_buckets_in(
allocation_size: usize,
table_layout: TableLayout,
group_width: usize,
) -> usize {
// Given an equation like:
// z >= x * y + x + g
// x can be maximized by doing:
// x = (z - g) / (y + 1)
// If you squint:
// x is the number of buckets
// y is the table_layout.size
// z is the size of the allocation
// g is the group width
// But this is ignoring the padding needed for ctrl_align.
// If we remember these restrictions:
// x is always a power of 2
// Layout size for T must always be a multiple of T
// Then the alignment can be ignored if we add the constraint:
// x * y >= table_layout.ctrl_align
// This is taken care of by `capacity_to_buckets`.
let numerator = allocation_size - group_width;
let denominator = table_layout.size + 1; // todo: ZSTs?
let quotient = numerator / denominator;
prev_pow2(quotient)
}

impl RawTableInner {
/// Allocates a new [`RawTableInner`] with the given number of buckets.
/// The control bytes and buckets are left uninitialized.
Expand All @@ -1753,7 +1787,7 @@ impl RawTableInner {
unsafe fn new_uninitialized<A>(
alloc: &A,
table_layout: TableLayout,
buckets: usize,
mut buckets: usize,
fallibility: Fallibility,
) -> Result<Self, TryReserveError>
where
Expand All @@ -1762,13 +1796,29 @@ impl RawTableInner {
debug_assert!(buckets.is_power_of_two());

// Avoid `Option::ok_or_else` because it bloats LLVM IR.
let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) {
let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) {
Some(lco) => lco,
None => return Err(fallibility.capacity_overflow()),
};

let ptr: NonNull<u8> = match do_alloc(alloc, layout) {
Ok(block) => block.cast(),
Ok(block) => {
// Utilize over-sized allocations.
let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH);
debug_assert!(x >= buckets);
// Calculate the new ctrl_offset.
let (_oversized_layout, oversized_ctrl_offset) =
match table_layout.calculate_layout_for(x) {
Some(lco) => lco,
None => unsafe { hint::unreachable_unchecked() },
};
debug_assert!(_oversized_layout.size() <= block.len());
debug_assert!(oversized_ctrl_offset >= ctrl_offset);
ctrl_offset = oversized_ctrl_offset;
buckets = x;

block.cast()
}
Err(_) => return Err(fallibility.alloc_err(layout)),
};

Expand Down Expand Up @@ -4586,6 +4636,23 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
mod test_map {
use super::*;

#[test]
fn test_prev_pow2() {
// Skip 0, not defined for that input.
let mut pow2: usize = 1;
while (pow2 << 1) > 0 {
let next_pow2 = pow2 << 1;
assert_eq!(pow2, prev_pow2(pow2));
// Need to skip 2, because it's also a power of 2, so it doesn't
// return the previous power of 2.
if next_pow2 > 2 {
assert_eq!(pow2, prev_pow2(pow2 + 1));
assert_eq!(pow2, prev_pow2(next_pow2 - 1));
}
pow2 = next_pow2;
}
}

#[test]
fn test_minimum_capacity_for_small_types() {
#[track_caller]
Expand Down

0 comments on commit 89f6d1f

Please sign in to comment.