Skip to content

Commit

Permalink
When allocating a new block, trim it to size if the new block would r…
Browse files Browse the repository at this point in the history
…esult in a net free space reduction from the previous block.

Over a number of runs, this showed roughly a 5-8% time reduction in a benchmark that included proto parsing.

PiperOrigin-RevId: 731152420
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Feb 26, 2025
1 parent 0964d25 commit d7357c1
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 42 deletions.
118 changes: 92 additions & 26 deletions upb/mem/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ void upb_Arena_SetMaxBlockSize(size_t max) {

typedef struct upb_MemBlock {
struct upb_MemBlock* next;
size_t size;
// If this block is the head of the list, tracks a growing hint of what the
// *next* block should be; otherwise tracks the size of the actual allocation.
size_t size_or_hint;
// Data follows.
} upb_MemBlock;

Expand Down Expand Up @@ -89,6 +91,12 @@ static upb_ArenaInternal* upb_Arena_Internal(const upb_Arena* a) {
return &((upb_ArenaState*)a)->body;
}

// Extracts the (upb_Arena*) from a (upb_ArenaInternal*)
static upb_Arena* upb_Arena_FromInternal(const upb_ArenaInternal* ai) {
ptrdiff_t offset = -offsetof(upb_ArenaState, body);
return UPB_PTR_AT(ai, offset, upb_Arena);
}

static bool _upb_Arena_IsTaggedRefcount(uintptr_t parent_or_count) {
return (parent_or_count & 1) == 1;
}
Expand Down Expand Up @@ -283,49 +291,85 @@ uint32_t upb_Arena_DebugRefCount(const upb_Arena* a) {
return (uint32_t)_upb_Arena_RefCountFromTagged(tagged);
}

// Adds an allocated block to the head of the list.
static void _upb_Arena_AddBlock(upb_Arena* a, void* ptr, size_t offset,
size_t block_size) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
upb_MemBlock* block = ptr;

block->size = block_size;
// Insert into linked list.
block->next = ai->blocks;
ai->blocks = block;

block->size_or_hint = block_size;
UPB_ASSERT(offset >= kUpb_MemblockReserve);
a->UPB_PRIVATE(ptr) = UPB_PTR_AT(block, offset, char);
char* start = UPB_PTR_AT(block, offset, char);
upb_MemBlock* head = ai->blocks;
if (head && head->next) {
// Fix up size to match actual allocation size
head->size_or_hint = a->UPB_PRIVATE(end) - (char*)head;
}
block->next = head;
ai->blocks = block;
a->UPB_PRIVATE(ptr) = start;
a->UPB_PRIVATE(end) = UPB_PTR_AT(block, block_size, char);

UPB_POISON_MEMORY_REGION(a->UPB_PRIVATE(ptr),
a->UPB_PRIVATE(end) - a->UPB_PRIVATE(ptr));
UPB_POISON_MEMORY_REGION(start, a->UPB_PRIVATE(end) - start);
UPB_ASSERT(UPB_PRIVATE(_upb_ArenaHas)(a) >= block_size - offset);
}

static bool _upb_Arena_AllocBlock(upb_Arena* a, size_t size) {
// Fulfills the allocation request by allocating a new block. Returns NULL on
// allocation failure.
void* UPB_PRIVATE(_upb_Arena_SlowMalloc)(upb_Arena* a, size_t size) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
if (!ai->block_alloc) return false;
if (!ai->block_alloc) return NULL;
size_t last_size = 128;
size_t current_free = 0;
upb_MemBlock* last_block = ai->blocks;
if (last_block) {
last_size = a->UPB_PRIVATE(end) - (char*)last_block;
current_free = a->UPB_PRIVATE(end) - a->UPB_PRIVATE(ptr);
}

// Relaxed order is safe here as we don't need any ordering with the setter.
size_t max_block_size =
upb_Atomic_Load(&g_max_block_size, memory_order_relaxed);

// Don't naturally grow beyond the max block size.
size_t clamped_size = UPB_MIN(last_size * 2, max_block_size);

size_t target_size = UPB_MIN(last_size * 2, max_block_size);
size_t future_free = UPB_MAX(size, target_size - kUpb_MemblockReserve) - size;
// We want to preserve exponential growth in block size without wasting too
// much unused space at the end of blocks. Once the head of our blocks list is
// large enough to always trigger a max-sized block for all subsequent
// allocations, allocate blocks that would net reduce free space behind it.
if (last_block && current_free > future_free &&
target_size < max_block_size) {
last_size = last_block->size_or_hint;
// Recalculate sizes with possibly larger last_size
target_size = UPB_MIN(last_size * 2, max_block_size);
future_free = UPB_MAX(size, target_size - kUpb_MemblockReserve) - size;
}
bool insert_after_head = false;
// Only insert after head if an allocated block is present; we don't want to
// continue allocating out of the initial block because we'll have no way of
// restoring the size of our allocated block if we add another.
if (last_block && current_free >= future_free) {
// If we're still going to net reduce free space with this new block, then
// only allocate the precise size requested and keep the current last block
// as the active block for future allocations.
insert_after_head = true;
target_size = size + kUpb_MemblockReserve;
// Add something to our previous size each time, so that eventually we
// will reach the max block size. Allocations larger than the max block size
// will always get their own backing allocation, so don't include them.
if (target_size <= max_block_size) {
last_block->size_or_hint =
UPB_MIN(last_block->size_or_hint + (size >> 1), max_block_size >> 1);
}
}
// We may need to exceed the max block size if the user requested a large
// allocation.
size_t block_size = UPB_MAX(kUpb_MemblockReserve + size, clamped_size);
size_t block_size = UPB_MAX(kUpb_MemblockReserve + size, target_size);

upb_MemBlock* block =
upb_malloc(_upb_ArenaInternal_BlockAlloc(ai), block_size);

if (!block) return false;
_upb_Arena_AddBlock(a, block, kUpb_MemblockReserve, block_size);
if (!block) return NULL;
// Atomic add not required here, as threads won't race allocating blocks, plus
// atomic fetch-add is slower than load/add/store on arm devices compiled
// targetting pre-v8.1. Relaxed order is safe as nothing depends on order of
Expand All @@ -335,13 +379,21 @@ static bool _upb_Arena_AllocBlock(upb_Arena* a, size_t size) {
upb_Atomic_Load(&ai->space_allocated, memory_order_relaxed);
upb_Atomic_Store(&ai->space_allocated, old_space_allocated + block_size,
memory_order_relaxed);
UPB_ASSERT(UPB_PRIVATE(_upb_ArenaHas)(a) >= size);
return true;
}

void* UPB_PRIVATE(_upb_Arena_SlowMalloc)(upb_Arena* a, size_t size) {
if (!_upb_Arena_AllocBlock(a, size)) return NULL; // OOM
return upb_Arena_Malloc(a, size - UPB_ASAN_GUARD_SIZE);
if (UPB_UNLIKELY(insert_after_head)) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
block->size_or_hint = block_size;
upb_MemBlock* head = ai->blocks;
block->next = head->next;
head->next = block;

char* allocated = UPB_PTR_AT(block, kUpb_MemblockReserve, char);
UPB_POISON_MEMORY_REGION(allocated + size, UPB_ASAN_GUARD_SIZE);
return allocated;
} else {
_upb_Arena_AddBlock(a, block, kUpb_MemblockReserve, block_size);
UPB_ASSERT(UPB_PRIVATE(_upb_ArenaHas)(a) >= size);
return upb_Arena_Malloc(a, size - UPB_ASAN_GUARD_SIZE);
}
}

static upb_Arena* _upb_Arena_InitSlow(upb_alloc* alloc, size_t first_size) {
Expand Down Expand Up @@ -420,18 +472,22 @@ static void _upb_Arena_DoFree(upb_ArenaInternal* ai) {
// Load first since arena itself is likely from one of its blocks.
upb_ArenaInternal* next_arena =
(upb_ArenaInternal*)upb_Atomic_Load(&ai->next, memory_order_acquire);
// Freeing may have memory barriers that confuse tsan, so assert immdiately
// Freeing may have memory barriers that confuse tsan, so assert immediately
// after load here
if (next_arena) {
UPB_TSAN_CHECK_PUBLISHED(next_arena);
}
upb_alloc* block_alloc = _upb_ArenaInternal_BlockAlloc(ai);
upb_MemBlock* block = ai->blocks;
if (block && block->next) {
block->size_or_hint =
upb_Arena_FromInternal(ai)->UPB_PRIVATE(end) - (char*)block;
}
upb_AllocCleanupFunc* alloc_cleanup = *ai->upb_alloc_cleanup;
while (block != NULL) {
// Load first since we are deleting block.
upb_MemBlock* next_block = block->next;
upb_free_sized(block_alloc, block, block->size);
upb_free_sized(block_alloc, block, block->size_or_hint);
block = next_block;
}
if (alloc_cleanup != NULL) {
Expand Down Expand Up @@ -723,3 +779,13 @@ void UPB_PRIVATE(_upb_Arena_SwapOut)(upb_Arena* des, const upb_Arena* src) {
*des = *src;
desi->blocks = srci->blocks;
}

bool _upb_Arena_WasLastAlloc(struct upb_Arena* a, void* ptr, size_t oldsize) {
upb_ArenaInternal* ai = upb_Arena_Internal(a);
upb_MemBlock* block = ai->blocks;
if (block == NULL) return false;
block = block->next;
if (block == NULL) return false;
char* start = UPB_PTR_AT(block, kUpb_MemblockReserve, char);
return ptr == start && oldsize == block->size_or_hint - kUpb_MemblockReserve;
}
22 changes: 12 additions & 10 deletions upb/mem/arena_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ TEST(ArenaTest, SizedFree) {
alloc.delegate_alloc = &upb_alloc_global;
alloc.sizes = &sizes;

upb_Arena* arena = upb_Arena_Init(nullptr, 0, &alloc.alloc);
char initial_block[1000];

upb_Arena* arena = upb_Arena_Init(initial_block, 1000, &alloc.alloc);
(void)upb_Arena_Malloc(arena, 500);
void* to_resize = upb_Arena_Malloc(arena, 2000);
void* resized = upb_Arena_Realloc(arena, to_resize, 2000, 4000);
Expand Down Expand Up @@ -190,11 +192,11 @@ TEST(OverheadTest, SingleMassiveBlockThenLittle) {
}
if (!UPB_ASAN) {
#ifdef __ANDROID__
EXPECT_NEAR(test.WastePct(), 0.21, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.05, 0.025);
EXPECT_NEAR(test.WastePct(), 0.075, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.09, 0.025);
#else
EXPECT_NEAR(test.WastePct(), 0.6, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.05, 0.025);
EXPECT_NEAR(test.WastePct(), 0.08, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.09, 0.025);
#endif
}
}
Expand All @@ -206,8 +208,8 @@ TEST(OverheadTest, Overhead_AlternatingSmallLargeBlocks) {
test.Alloc(64);
}
if (!UPB_ASAN) {
EXPECT_NEAR(test.WastePct(), 0.45, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 1, 0.025);
EXPECT_NEAR(test.WastePct(), 0.007, 0.0025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.52, 0.025);
}
}

Expand All @@ -217,7 +219,7 @@ TEST(OverheadTest, PartialMaxBlocks) {
test.Alloc(2096 + i);
}
if (!UPB_ASAN) {
EXPECT_NEAR(test.WastePct(), 0.47, 0.025);
EXPECT_NEAR(test.WastePct(), 0.16, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 1.1, 0.25);
}
}
Expand Down Expand Up @@ -245,7 +247,7 @@ TEST(OverheadTest, SmallBlocksLargerThanInitial_many) {
EXPECT_NEAR(test.WastePct(), 0.09, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.12, 0.025);
#else
EXPECT_NEAR(test.WastePct(), 0.14, 0.025);
EXPECT_NEAR(test.WastePct(), 0.12, 0.03);
EXPECT_NEAR(test.AmortizedAlloc(), 0.08, 0.025);
#endif
}
Expand All @@ -257,7 +259,7 @@ TEST(OverheadTest, SmallBlocksLargerThanInitial_many) {
EXPECT_NEAR(test.WastePct(), 0.05, 0.03);
EXPECT_NEAR(test.AmortizedAlloc(), 0.08, 0.025);
#else
EXPECT_NEAR(test.WastePct(), 0.03, 0.025);
EXPECT_NEAR(test.WastePct(), 0.04, 0.025);
EXPECT_NEAR(test.AmortizedAlloc(), 0.05, 0.025);
#endif
}
Expand Down
25 changes: 19 additions & 6 deletions upb/mem/internal/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,26 @@ UPB_API_INLINE void* upb_Arena_Realloc(struct upb_Arena* a, void* ptr,
UPB_API_INLINE void upb_Arena_ShrinkLast(struct upb_Arena* a, void* ptr,
size_t oldsize, size_t size) {
UPB_TSAN_CHECK_WRITE(a->UPB_ONLYBITS(ptr));
oldsize = UPB_ALIGN_MALLOC(oldsize);
size = UPB_ALIGN_MALLOC(size);
// Must be the last alloc.
UPB_ASSERT((char*)ptr + oldsize ==
a->UPB_ONLYBITS(ptr) - UPB_ASAN_GUARD_SIZE);
UPB_ASSERT(size <= oldsize);
a->UPB_ONLYBITS(ptr) = (char*)ptr + size;
size = UPB_ALIGN_MALLOC(size) + UPB_ASAN_GUARD_SIZE;
oldsize = UPB_ALIGN_MALLOC(oldsize) + UPB_ASAN_GUARD_SIZE;
if (size == oldsize) {
return;
}
char* arena_ptr = a->UPB_ONLYBITS(ptr);
// If it's the last alloc in the last block, we can resize.
if ((char*)ptr + oldsize == arena_ptr) {
a->UPB_ONLYBITS(ptr) = (char*)ptr + size;
} else {
// If not, verify that it could have been a full-block alloc that did not
// replace the last block.
#ifndef NDEBUG
bool _upb_Arena_WasLastAlloc(struct upb_Arena * a, void* ptr,
size_t oldsize);
UPB_ASSERT(_upb_Arena_WasLastAlloc(a, ptr, oldsize));
#endif
}
UPB_POISON_MEMORY_REGION((char*)ptr + size, oldsize - size);
}

#ifdef __cplusplus
Expand Down

0 comments on commit d7357c1

Please sign in to comment.