Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce FORCE_DEFRAG compilation option to allow activedefrag run when allocator is not jemalloc #1303

Merged
merged 25 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5044dfc
compile time option to force activedefrag
ranshid Nov 13, 2024
c62beb2
compile time option to force activedefrag
ranshid Nov 13, 2024
31a04dc
Add Cmake support
ranshid Nov 14, 2024
a42e565
Make defrag always active when forced
ranshid Nov 14, 2024
1f26c5d
Simplify the FORCE_DEFRAG Cmake logic
ranshid Nov 14, 2024
f36ee08
order and fix some uneeded changes
ranshid Nov 14, 2024
27f4290
make changes to SERVER_FLAGS
ranshid Nov 14, 2024
c1862eb
Merge branch 'unstable', remote-tracking branch 'origin' into force-d…
ranshid Nov 14, 2024
4643d97
fix spell and format issues
ranshid Nov 14, 2024
5d65221
Merge remote-tracking branch 'origin/unstable' into force-defrag
ranshid Dec 8, 2024
1744aab
merge and fix some pr comments
ranshid Dec 9, 2024
14e102e
add daily workflow test
ranshid Dec 9, 2024
bd2b04a
fix compilation on x86
ranshid Dec 9, 2024
6606bb6
format fixes
ranshid Dec 9, 2024
ec6ad04
more formatt fighting
ranshid Dec 9, 2024
cf5841f
one last try to solve the format issue
ranshid Dec 9, 2024
329167f
format
ranshid Dec 9, 2024
05d3b03
format
ranshid Dec 9, 2024
0c9f535
Update src/zmalloc.c
madolson Dec 9, 2024
6cf641a
Update src/allocator_defrag.h
madolson Dec 9, 2024
51e83ad
Update .github/workflows/daily.yml
ranshid Dec 10, 2024
5a836a1
add debug-defrag flag for tests + debug_defarg::skip tag option for t…
ranshid Dec 10, 2024
cb1b2ce
fix bad global tag
ranshid Dec 10, 2024
0c1b7b0
preset debug_defrag global fleg
ranshid Dec 11, 2024
5ddb45e
Merge remote-tracking branch 'origin/unstable' into force-defrag
ranshid Dec 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,56 @@ jobs:
if: true && !contains(github.event.inputs.skiptests, 'unittest')
run: ./src/valkey-unit-tests --accurate

test-sanitizer-force-defrag:
runs-on: ubuntu-latest
if: |
(github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'valkey-io/valkey') ||
(github.event_name == 'pull_request' && github.event.pull_request.base.ref != 'unstable')) &&
!contains(github.event.inputs.skipjobs, 'sanitizer')
timeout-minutes: 14400
strategy:
fail-fast: false
matrix:
compiler: [gcc, clang]
env:
CC: ${{ matrix.compiler }}
steps:
- name: prep
if: github.event_name == 'workflow_dispatch'
run: |
echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
echo "skipjobs: ${{github.event.inputs.skipjobs}}"
echo "skiptests: ${{github.event.inputs.skiptests}}"
echo "test_args: ${{github.event.inputs.test_args}}"
echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}"
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
repository: ${{ env.GITHUB_REPOSITORY }}
ref: ${{ env.GITHUB_HEAD_REF }}
- name: make
run: make all-with-unit-tests OPT=-O3 SANITIZER=address DEBUG_FORCE_DEFRAG=yes USE_JEMALLOC=no SERVER_CFLAGS='-Werror'
- name: testprep
run: |
sudo apt-get update
sudo apt-get install tcl8.6 tclx -y
- name: test
if: true && !contains(github.event.inputs.skiptests, 'valkey')
run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
- name: module api test
if: true && !contains(github.event.inputs.skiptests, 'modules')
run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
- name: sentinel tests
if: true && !contains(github.event.inputs.skiptests, 'sentinel')
run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
- name: cluster tests
if: true && !contains(github.event.inputs.skiptests, 'cluster')
run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}}
- name: unittest
if: true && !contains(github.event.inputs.skiptests, 'unittest')
run: ./src/valkey-unit-tests

test-rpm-distros-jemalloc:
if: |
(github.event_name == 'workflow_dispatch' ||
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ unset(BUILD_UNIT_TESTS CACHE)
unset(BUILD_TEST_MODULES CACHE)
unset(BUILD_EXAMPLE_MODULES CACHE)
unset(USE_TLS CACHE)
unset(DEBUG_FORCE_DEFRAG CACHE)
4 changes: 3 additions & 1 deletion deps/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
add_subdirectory(jemalloc)
if (USE_JEMALLOC)
add_subdirectory(jemalloc)
endif ()
add_subdirectory(lua)

# Set hiredis options. We need to disable the defaults set in the OPTION(..) we do this by setting them in the CACHE
Expand Down
6 changes: 6 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ if (VALKEY_RELEASE_BUILD)
set_property(TARGET valkey-server PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif ()

if (DEBUG_FORCE_DEFRAG)
message(STATUS "Forcing Active Defrag run on valkey-server")
target_compile_definitions(valkey-server PRIVATE DEBUG_FORCE_DEFRAG)
target_compile_definitions(valkey-server PRIVATE HAVE_DEFRAG)
endif ()

if (BUILD_SANITIZER)
# 'BUILD_SANITIZER' is defined in ValkeySetup module (based on user input)
# If defined, the variables 'VALKEY_SANITAIZER_CFLAGS' and 'VALKEY_SANITAIZER_LDFLAGS'
Expand Down
5 changes: 5 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ ifdef REDIS_LDFLAGS
SERVER_LDFLAGS := $(REDIS_LDFLAGS)
endif

# Special case of forcing defrag to run even though we have no Jemlloc support
ifeq ($(DEBUG_FORCE_DEFRAG), yes)
SERVER_CFLAGS +=-DHAVE_DEFRAG -DDEBUG_FORCE_DEFRAG
endif

FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(SERVER_CFLAGS)
FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(SERVER_LDFLAGS) $(DEBUG)
FINAL_LIBS=-lm
Expand Down
59 changes: 55 additions & 4 deletions src/allocator_defrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,10 @@
* the other component to ensure both are using the same allocator configuration.
*/

#include <stdio.h>
#include "server.h"
#include "serverassert.h"
#include "allocator_defrag.h"

#define UNUSED(x) (void)(x)

#if defined(HAVE_DEFRAG) && defined(USE_JEMALLOC)

#define STRINGIFY_(x) #x
Expand Down Expand Up @@ -402,8 +400,56 @@ int allocatorShouldDefrag(void *ptr) {
je_cb.bin_info[binind].nregs - SLAB_NFREE(out, 0));
}

#else
/* Utility function to get the fragmentation ratio from jemalloc.
* It is critical to do that by comparing only heap maps that belong to
* jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
* fragmentation ratio in order to decide if a defrag action should be taken
* or not, a false detection can cause the defragmenter to waste a lot of CPU
* without the possibility of getting any results. */
float getAllocatorFragmentation(size_t *out_frag_bytes) {
size_t resident, active, allocated, frag_smallbins_bytes;
zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL);
frag_smallbins_bytes = allocatorDefragGetFragSmallbins();
/* Calculate the fragmentation ratio as the proportion of wasted memory in small
* bins (which are defraggable) relative to the total allocated memory (including large bins).
* This is because otherwise, if most of the memory usage is large bins, we may show high percentage,
* despite the fact it's not a lot of memory for the user. */
float frag_pct = (float)frag_smallbins_bytes / allocated * 100;
float rss_pct = ((float)resident / allocated) * 100 - 100;
size_t rss_bytes = resident - allocated;
if (out_frag_bytes) *out_frag_bytes = frag_smallbins_bytes;
serverLog(LL_DEBUG, "allocated=%zu, active=%zu, resident=%zu, frag=%.2f%% (%.2f%% rss), frag_bytes=%zu (%zu rss)",
allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes);
return frag_pct;
}

#elif defined(DEBUG_FORCE_DEFRAG)
int allocatorDefragInit(void) {
return 0;
}
void allocatorDefragFree(void *ptr, size_t size) {
UNUSED(size);
zfree(ptr);
}
__attribute__((malloc)) void *allocatorDefragAlloc(size_t size) {
return zmalloc(size);
return NULL;
}
unsigned long allocatorDefragGetFragSmallbins(void) {
return 0;
}

int allocatorShouldDefrag(void *ptr) {
UNUSED(ptr);
return 1;
}

float getAllocatorFragmentation(size_t *out_frag_bytes) {
*out_frag_bytes = server.active_defrag_ignore_bytes + 1;
return server.active_defrag_threshold_upper;
}

#else
int allocatorDefragInit(void) {
return -1;
}
Expand All @@ -423,4 +469,9 @@ int allocatorShouldDefrag(void *ptr) {
UNUSED(ptr);
return 0;
}

float getAllocatorFragmentation(size_t *out_frag_bytes) {
UNUSED(out_frag_bytes);
return 0;
}
#endif
10 changes: 6 additions & 4 deletions src/allocator_defrag.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
#include <jemalloc/jemalloc.h>
/* We can enable the server defrag capabilities only if we are using Jemalloc
* and the version that has the experimental.utilization namespace in mallctl . */
#if defined(JEMALLOC_VERSION_MAJOR) && \
(JEMALLOC_VERSION_MAJOR > 5 || \
(JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR > 2) || \
(JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR == 2 && JEMALLOC_VERSION_BUGFIX >= 1))
#if (defined(JEMALLOC_VERSION_MAJOR) && \
(JEMALLOC_VERSION_MAJOR > 5 || \
(JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR > 2) || \
(JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR == 2 && JEMALLOC_VERSION_BUGFIX >= 1))) || \
defined(DEBUG_FORCE_DEFRAG)
#define HAVE_DEFRAG
#endif
#endif
Expand All @@ -18,5 +19,6 @@ void allocatorDefragFree(void *ptr, size_t size);
__attribute__((malloc)) void *allocatorDefragAlloc(size_t size);
unsigned long allocatorDefragGetFragSmallbins(void);
int allocatorShouldDefrag(void *ptr);
float getAllocatorFragmentation(size_t *out_frag_bytes);

#endif /* __ALLOCATOR_DEFRAG_H */
2 changes: 1 addition & 1 deletion src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3190,7 +3190,7 @@ standardConfig static_configs[] = {
createBoolConfig("replica-read-only", "slave-read-only", DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_replica_ro, 1, NULL, NULL),
createBoolConfig("replica-ignore-maxmemory", "slave-ignore-maxmemory", MODIFIABLE_CONFIG, server.repl_replica_ignore_maxmemory, 1, NULL, NULL),
createBoolConfig("jemalloc-bg-thread", NULL, MODIFIABLE_CONFIG, server.jemalloc_bg_thread, 1, NULL, updateJemallocBgThread),
createBoolConfig("activedefrag", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.active_defrag_enabled, 0, isValidActiveDefrag, NULL),
createBoolConfig("activedefrag", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.active_defrag_enabled, CONFIG_ACTIVE_DEFRAG_DEFAULT, isValidActiveDefrag, NULL),
createBoolConfig("syslog-enabled", NULL, IMMUTABLE_CONFIG, server.syslog_enabled, 0, NULL, NULL),
createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, server.cluster_enabled, 0, NULL, NULL),
createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG | DENY_LOADING_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly),
Expand Down
28 changes: 0 additions & 28 deletions src/defrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,6 @@ static_assert(offsetof(defragPubSubCtx, kvstate) == 0, "defragStageKvstoreHelper
static list *defrag_later;
static unsigned long defrag_later_cursor;


/* this method was added to jemalloc in order to help us understand which
* pointers are worthwhile moving and which aren't */
int je_get_defrag_hint(void *ptr);

/* Defrag function which allocates and copies memory if needed, but DOESN'T free the old block.
* It is the responsibility of the caller to free the old block if a non-NULL value (new block)
* is returned. (Returns NULL if no relocation was needed.)
Expand Down Expand Up @@ -828,29 +823,6 @@ static void dbKeysScanCallback(void *privdata, const dictEntry *de) {
server.stat_active_defrag_scanned++;
}

/* Utility function to get the fragmentation ratio from jemalloc.
* It is critical to do that by comparing only heap maps that belong to
* jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
* fragmentation ratio in order to decide if a defrag action should be taken
* or not, a false detection can cause the defragmenter to waste a lot of CPU
* without the possibility of getting any results. */
static float getAllocatorFragmentation(size_t *out_frag_bytes) {
size_t resident, active, allocated, frag_smallbins_bytes;
zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL);
frag_smallbins_bytes = allocatorDefragGetFragSmallbins();
/* Calculate the fragmentation ratio as the proportion of wasted memory in small
* bins (which are defraggable) relative to the total allocated memory (including large bins).
* This is because otherwise, if most of the memory usage is large bins, we may show high percentage,
* despite the fact it's not a lot of memory for the user. */
float frag_pct = (float)frag_smallbins_bytes / allocated * 100;
float rss_pct = ((float)resident / allocated) * 100 - 100;
size_t rss_bytes = resident - allocated;
if (out_frag_bytes) *out_frag_bytes = frag_smallbins_bytes;
serverLog(LL_DEBUG, "allocated=%zu, active=%zu, resident=%zu, frag=%.2f%% (%.2f%% rss), frag_bytes=%zu (%zu rss)",
allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes);
return frag_pct;
}

/* Defrag scan callback for the pubsub dictionary. */
static void defragPubsubScanCallback(void *privdata, const dictEntry *de) {
defragPubSubCtx *ctx = privdata;
Expand Down
5 changes: 5 additions & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ struct hdr_histogram;
#define DEFAULT_WAIT_BEFORE_RDB_CLIENT_FREE 60 /* Grace period in seconds for replica main \
* channel to establish psync. */
#define LOADING_PROCESS_EVENTS_INTERVAL_DEFAULT 100 /* Default: 0.1 seconds */
#if !defined(DEBUG_FORCE_DEFRAG)
#define CONFIG_ACTIVE_DEFRAG_DEFAULT 0
#else
#define CONFIG_ACTIVE_DEFRAG_DEFAULT 1
#endif

/* Bucket sizes for client eviction pools. Each bucket stores clients with
* memory usage of up to twice the size of the bucket below it. */
Expand Down
Loading