From 58634b5dbbfbd337a1ea382b606f4583f9ac0065 Mon Sep 17 00:00:00 2001 From: Dipin Hora Date: Tue, 15 Oct 2024 17:43:43 -0400 Subject: [PATCH] Recycle actor heap chunks after GC instead of returning to pool Before this commit, any unused chunks after actor heap garbage collection would be destroyed and returned to the memory pool immediately for reuse by the runtime or any actor. This commit changes things so that instead of destroying and returning the chunks immediatelly, we assume the actor will likely need more memory as it runs more behaviors and keep the recently unused chunks around in case that happens. This is generally more efficient than destroying a chunk and getting a new one from the memory pool because both destorying a chunk and allocating a new one involve updating the pagemap for the chunk to indicate which actor owns the chunk. Updating the pagemap is an expensive operation which we can avoid if we recycle the chunks instead. The main drawback is that since actors will no longer return chunks to the memory pool immediately after a GC, the overall system might end up using more memory as any freed chunks can only be reused by the actor that owns them and the runtime and other actors can no longer reuse that memory as they previously might have been able to. --- src/libponyrt/mem/heap.c | 128 +++++++++++++++++++------ src/libponyrt/mem/heap.h | 2 + src/libponyrt/mem/heap_chunk_sorting.h | 124 ++++++++++++++++++++++++ 3 files changed, 225 insertions(+), 29 deletions(-) create mode 100644 src/libponyrt/mem/heap_chunk_sorting.h diff --git a/src/libponyrt/mem/heap.c b/src/libponyrt/mem/heap.c index 83317104a4..f18d5e6002 100644 --- a/src/libponyrt/mem/heap.c +++ b/src/libponyrt/mem/heap.c @@ -40,6 +40,8 @@ typedef struct small_chunk_t struct small_chunk_t* next; } small_chunk_t; +// include it after defining `large_chunk_t` +#include "heap_chunk_sorting.h" #define CHUNK_TYPE_BITMASK (uintptr_t)0x1 #define CHUNK_NEEDS_TO_BE_CLEARED_BITMASK (uintptr_t)0x2 @@ -547,8 +549,11 @@ void ponyint_heap_init(heap_t* heap) void ponyint_heap_destroy(heap_t* heap) { + large_chunk_list(destroy_large, heap->large_recyclable, 0); large_chunk_list(destroy_large, heap->large, 0); + small_chunk_list(destroy_small, heap->small_recyclable, 0); + for(int i = 0; i < HEAP_SIZECLASSES; i++) { small_chunk_list(destroy_small, heap->small_free[i], 0); @@ -622,9 +627,29 @@ void* ponyint_heap_alloc_small(pony_actor_t* actor, heap_t* heap, heap->small_full[sizeclass] = chunk; } } else { - small_chunk_t* n = (small_chunk_t*) POOL_ALLOC(small_chunk_t); - n->base.m = (char*) POOL_ALLOC(block_t); + small_chunk_t* n = NULL; + + // recycle a small chunk if available because it avoids setting the pagemap + if ((NULL == n) && (NULL != heap->small_recyclable)) + { + n = heap->small_recyclable; + heap->small_recyclable = n->next; + } + + // If there are none in the recyclable list, get a new one. + if (NULL == n) + { + n = (small_chunk_t*) POOL_ALLOC(small_chunk_t); + n->base.m = (char*) POOL_ALLOC(block_t); + ponyint_pagemap_set(get_m((chunk_t*)n), (chunk_t*)n, actor); + } + + // we should always have a chunk and m at this point + pony_assert(n != NULL); + pony_assert(n->base.m != NULL); + set_small_chunk_size(n, sizeclass); + #ifdef USE_RUNTIMESTATS actor->actorstats.heap_mem_used += sizeof(small_chunk_t); actor->actorstats.heap_mem_allocated += POOL_ALLOC_SIZE(small_chunk_t); @@ -640,10 +665,10 @@ void* ponyint_heap_alloc_small(pony_actor_t* actor, heap_t* heap, n->slots = sizeclass_init[sizeclass]; n->next = NULL; + // this is required for the heap garbage collection as it needs to know that + // this chunk needs to be cleared set_chunk_needs_clearing((chunk_t*)n); - ponyint_pagemap_set(get_m((chunk_t*)n), (chunk_t*)n, actor); - heap->small_free[sizeclass] = n; chunk = n; @@ -672,9 +697,49 @@ void* ponyint_heap_alloc_large(pony_actor_t* actor, heap_t* heap, size_t size, size = ponyint_pool_adjust_size(size); - large_chunk_t* chunk = (large_chunk_t*) POOL_ALLOC(large_chunk_t); - chunk->size = size; - chunk->base.m = (char*) ponyint_pool_alloc_size(size); + large_chunk_t* chunk = NULL; + + // recycle a large chunk if available because it avoids setting the pagemap + if ((NULL == chunk) && (NULL != heap->large_recyclable)) + { + large_chunk_t* recycle = heap->large_recyclable; + large_chunk_t** prev = &recycle; + + // short circuit as soon as we see a chunk that is too big because this list is sorted by size + while (NULL != recycle && recycle->size <= size) + { + // we only recycle if the size is exactly what is required + if (recycle->size == size) + { + if (*prev == heap->large_recyclable) + heap->large_recyclable = recycle->next; + else + (*prev)->next = recycle->next; + + chunk = recycle; + chunk->next = NULL; + break; + } else { + prev = &recycle; + recycle = recycle->next; + } + } + } + + // If there is no suitable one in the recyclable list, get a new one. + if (NULL == chunk) + { + chunk = (large_chunk_t*) POOL_ALLOC(large_chunk_t); + chunk->size = size; + chunk->base.m = (char*) ponyint_pool_alloc_size(size); + large_pagemap(get_m((chunk_t*)chunk), size, (chunk_t*)chunk, actor); + } + + // we should always have a chunk and m of the right size at this point + pony_assert(chunk != NULL); + pony_assert(chunk->base.m != NULL); + pony_assert(chunk->size == size); + #ifdef USE_RUNTIMESTATS actor->actorstats.heap_mem_used += sizeof(large_chunk_t); actor->actorstats.heap_mem_used += chunk->size; @@ -685,13 +750,13 @@ void* ponyint_heap_alloc_large(pony_actor_t* actor, heap_t* heap, size_t size, #endif set_large_chunk_slot(chunk, 0); + // this is required for the heap garbage collection as it needs to know that + // this chunk needs to be cleared set_chunk_needs_clearing((chunk_t*)chunk); // note if a finaliser needs to run or not set_large_chunk_finaliser(chunk, (track_finalisers_mask & 1)); - large_pagemap(get_m((chunk_t*)chunk), size, (chunk_t*)chunk, actor); - chunk->next = heap->large; heap->large = chunk; heap->used += chunk->size; @@ -886,18 +951,18 @@ void ponyint_heap_endgc(heap_t* heap // make a copy of all the heap list pointers into a temporary heap heap_t theap = {}; heap_t* temp_heap = &theap; - memcpy(temp_heap, heap, offsetof(heap_t, used)); + memcpy(temp_heap, heap, offsetof(heap_t, small_recyclable)); // set all the heap list pointers to NULL in the real heap to ensure that // any new allocations by finalisers during the GC process don't reuse memory // freed during the GC process memset(heap, 0, offsetof(heap_t, used)); - // lists of chunks to destroy - small_chunk_t* to_destroy_small = NULL; - large_chunk_t* to_destroy_large = NULL; + // lists of chunks to recycle + small_chunk_t** to_recycle_small = &temp_heap->small_recyclable; + large_chunk_t** to_recycle_large = &temp_heap->large_recyclable; - // sweep all the chunks in the temporary heap while accumulating chunks to destroy + // sweep all the chunks in the temporary heap while accumulating chunks to recycle // NOTE: the real heap will get new chunks allocated and added to it during this process // if a finaliser allocates memory... we have to make sure that finalisers don't // reuse memory being freed or we'll end up with a use-after-free bug due to @@ -917,21 +982,21 @@ void ponyint_heap_endgc(heap_t* heap uint32_t empty = sizeclass_empty[i]; #ifdef USE_RUNTIMESTATS - used += sweep_small(list1, avail, full, &to_destroy_small, empty, size, + used += sweep_small(list1, avail, full, to_recycle_small, empty, size, &mem_allocated, &mem_used, &num_allocated); - used += sweep_small(list2, avail, full, &to_destroy_small, empty, size, + used += sweep_small(list2, avail, full, to_recycle_small, empty, size, &mem_allocated, &mem_used, &num_allocated); #else - used += sweep_small(list1, avail, full, &to_destroy_small, empty, size); - used += sweep_small(list2, avail, full, &to_destroy_small, empty, size); + used += sweep_small(list1, avail, full, to_recycle_small, empty, size); + used += sweep_small(list2, avail, full, to_recycle_small, empty, size); #endif } #ifdef USE_RUNTIMESTATS - temp_heap->large = sweep_large(temp_heap->large, &to_destroy_large, &used, &mem_allocated, &mem_used, + temp_heap->large = sweep_large(temp_heap->large, to_recycle_large, &used, &mem_allocated, &mem_used, &num_allocated); #else - temp_heap->large = sweep_large(temp_heap->large, &to_destroy_large, &used); + temp_heap->large = sweep_large(temp_heap->large, to_recycle_large, &used); #endif // we need to combine the temporary heap lists with the real heap lists @@ -951,8 +1016,8 @@ void ponyint_heap_endgc(heap_t* heap { small_chunk_t* temp = *sc_temp; *sc_temp = temp->next; - temp->next = to_destroy_small; - to_destroy_small = temp; + temp->next = *to_recycle_small; + *to_recycle_small = temp; } else { sc_temp = &(*sc_temp)->next; } @@ -969,8 +1034,8 @@ void ponyint_heap_endgc(heap_t* heap { small_chunk_t* temp = *sc_temp; *sc_temp = temp->next; - temp->next = to_destroy_small; - to_destroy_small = temp; + temp->next = *to_recycle_small; + *to_recycle_small = temp; } else { sc_temp = &(*sc_temp)->next; } @@ -988,18 +1053,23 @@ void ponyint_heap_endgc(heap_t* heap { large_chunk_t* temp = *lc_temp; *lc_temp = temp->next; - temp->next = to_destroy_large; - to_destroy_large = temp; + temp->next = *to_recycle_large; + *to_recycle_large = temp; } else { lc_temp = &(*lc_temp)->next; } } *lc_temp = temp_heap->large; - // destroy all the chunks that need to be destroyed - small_chunk_list(destroy_small, to_destroy_small, 0); - large_chunk_list(destroy_large, to_destroy_large, 0); + // destroy all the chunks that didn't get re-used since the last GC run so + // that other actors can re-use the memory from the pool + small_chunk_list(destroy_small, heap->small_recyclable, 0); + large_chunk_list(destroy_large, heap->large_recyclable, 0); + // save any chunks that can be recycled from this GC run + // sort large chunks by the size of the chunks + heap->large_recyclable = sort_large_chunk_list_by_size(*to_recycle_large); + heap->small_recyclable = *to_recycle_small; // Foreign object sizes will have been added to heap->used already. Here we // add local object sizes as well and set the next gc point for when memory diff --git a/src/libponyrt/mem/heap.h b/src/libponyrt/mem/heap.h index 2496e033bb..3f8cea254d 100644 --- a/src/libponyrt/mem/heap.h +++ b/src/libponyrt/mem/heap.h @@ -24,6 +24,8 @@ typedef struct heap_t small_chunk_t* small_free[HEAP_SIZECLASSES]; small_chunk_t* small_full[HEAP_SIZECLASSES]; large_chunk_t* large; + small_chunk_t* small_recyclable; + large_chunk_t* large_recyclable; size_t used; size_t next_gc; diff --git a/src/libponyrt/mem/heap_chunk_sorting.h b/src/libponyrt/mem/heap_chunk_sorting.h new file mode 100644 index 0000000000..d3e93a8e41 --- /dev/null +++ b/src/libponyrt/mem/heap_chunk_sorting.h @@ -0,0 +1,124 @@ +#ifndef mem_heap_chunk_sorting_h +#define mem_heap_chunk_sorting_h + +#include "heap.h" + +/* + * Shamelessly stolen/adapted from Simon Tatham from: + * https://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c + * + * This file is copyright 2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +static large_chunk_t* sort_large_chunk_list_by_size(large_chunk_t *list) +{ + large_chunk_t *p, *q, *e, *tail; + int32_t insize, nmerges, psize, qsize, i; + + /* + * Silly special case: if `list' was passed in as NULL, return + * NULL immediately. + */ + if (!list) + return NULL; + + insize = 1; + + while (true) + { + p = list; + list = NULL; + tail = NULL; + + nmerges = 0; /* count number of merges we do in this pass */ + + while (NULL != p) + { + nmerges++; /* there exists a merge to be done */ + /* step `insize' places along from p */ + q = p; + psize = 0; + for (i = 0; i < insize; i++) + { + psize++; + q = q->next; + if (NULL == q) + break; + } + + /* if q hasn't fallen off end, we have two lists to merge */ + qsize = insize; + + /* now we have two lists; merge them */ + while (psize > 0 || (qsize > 0 && (NULL != q))) + { + /* decide whether next element of merge comes from p or q */ + if (psize == 0) + { + /* p is empty; e must come from q. */ + e = q; + q = q->next; + qsize--; + } else if (qsize == 0 || !q) { + /* q is empty; e must come from p. */ + e = p; + p = p->next; + psize--; + } else if (p->size <= q->size) { + /* First element of p is lower (or same); + * e must come from p. */ + e = p; + p = p->next; + psize--; + } else { + /* First element of q is lower; e must come from q. */ + e = q; + q = q->next; + qsize--; + } + + /* add the next element to the merged list */ + if (NULL != tail) + tail->next = e; + else + list = e; + + tail = e; + } + + /* now p has stepped `insize' places along, and q has too */ + p = q; + } + + tail->next = NULL; + + /* If we have done only one merge, we're finished. */ + if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ + return list; + + /* Otherwise repeat, merging lists twice the size */ + insize *= 2; + } +} + +#endif