Skip to content

Commit

Permalink
Add adaptive replacement cache
Browse files Browse the repository at this point in the history
Current basic block management consumes a significant amount of memory,
which leads to unnecessary waste due to frequent map allocation and
release. Adaptive Replacement Cache (ARC) is a page replacement
algorithm with better performance than least recently used (LRU). After
the translated blocks are handled by ARC, better memory usage and hit
rates can be achieved by keeping track of frequently used and recently
used pages, as well as a recent eviction history for both.

According to the cache information obtained while running CoreMark, the
cache hit rate of ARC can reach over 99%.
  • Loading branch information
qwe661234 committed Mar 3, 2023
1 parent a713b4c commit 45ff941
Show file tree
Hide file tree
Showing 8 changed files with 776 additions and 7 deletions.
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ gdbstub-test: $(BIN)
$(Q)tests/gdbstub.sh && $(call notice, [OK])
endif

# Import adaptive replacement cache
ENABLE_ARCACHE ?= 1
$(call set-feature, ARCACHE)

# Enable print cache information
ENABLE_ARCACHE_INFO ?= 1
$(call set-feature, ARCACHE_INFO)

# For tail-call elimination, we need a specific set of build flags applied.
# FIXME: On macOS + Apple Silicon, -fno-stack-protector might have a negative impact.
$(OUT)/emulate.o: CFLAGS += -fomit-frame-pointer -fno-stack-check -fno-stack-protector
Expand All @@ -93,6 +101,7 @@ OBJS := \
emulate.o \
riscv.o \
elf.o \
cache.o \
$(OBJS_EXT) \
main.o

Expand Down
230 changes: 230 additions & 0 deletions src/cache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
#include "cache.h"

#define min(a, b) ((a < b) ? a : b)
#define max(a, b) ((a > b) ? a : b)
#define BITS 10
#define SIZE 1024
#define GOLDEN_RATIO_32 0x61C88647
#define HASH(val) (((val) *GOLDEN_RATIO_32) >> (32 - BITS)) % SIZE

typedef struct arc_entry {
void *value;
uint32_t key;
arc_type_t arc_type;
struct list_head list;
struct list_head ht_list;
} arc_entry_t;

typedef struct hashtable {
struct list_head *ht_list_head;
} hashtable_t;

cache_t *cache_create()
{
cache_t *cache = (cache_t *) malloc(sizeof(cache_t));
for (int i = 0; i < 4; i++) {
cache->list_table[i] =
(struct list_head *) malloc(sizeof(struct list_head));
INIT_LIST_HEAD(cache->list_table[i]);
cache->list_size[i] = 0;
}
cache->map = (hashtable_t *) malloc(sizeof(hashtable_t));
cache->map->ht_list_head =
(struct list_head *) malloc(SIZE * sizeof(struct list_head));

for (int i = 0; i < SIZE; i++) {
INIT_LIST_HEAD(&cache->map->ht_list_head[i]);
}

cache->c = SIZE;
cache->p = SIZE / 2;
#if RV32_HAS(ARCACHE_INFO)
cache->get_time = 0;
cache->hit_time = 0;
#endif
return cache;
}

void cache_free(cache_t *cache, void (*release_entry)(void *))
{
for (int i = 0; i < 4; i++) {
arc_entry_t *entry, *safe;
list_for_each_entry_safe(entry, safe, cache->list_table[i], list)
release_entry(entry->value);
free(cache->list_table[i]);
}
free(cache->map->ht_list_head);
free(cache->map);
free(cache);
}

/* Rule of ARC
* 1. size of T1 + size of T2 <= c
* 2. size of T1 + size of B1 <= c
* 3. size of T2 + size of B2 <= 2c
* 4. size of T1 + size of T2 + size of B1 + size of B2 <= 2c
*/
#if RV32_HAS(ARCACHE_INFO)
void assert_cache(cache_t *cache)
{
assert(cache->list_size[T1] + cache->list_size[T2] <= cache->c);
assert(cache->list_size[T1] + cache->list_size[B1] <= cache->c);
assert(cache->list_size[T2] + cache->list_size[B2] <= 2 * cache->c);
assert(cache->list_size[T1] + cache->list_size[B1] + cache->list_size[T2] +
cache->list_size[B2] <=
2 * cache->c);
}
#endif

void move_to_mru(cache_t *cache, arc_entry_t *entry, const arc_type_t arc_type)
{
cache->list_size[entry->arc_type]--;
cache->list_size[arc_type]++;
entry->arc_type = arc_type;
list_move(&entry->list, cache->list_table[arc_type]);
}

void replaceT1(cache_t *cache)
{
if (cache->list_size[T1] >= cache->p)
move_to_mru(cache,
list_last_entry(cache->list_table[T1], arc_entry_t, list),
B1);
}
void replaceT2(cache_t *cache)
{
if (cache->list_size[T2] >= (cache->c - cache->p))
move_to_mru(cache,
list_last_entry(cache->list_table[T2], arc_entry_t, list),
B2);
}

void *cache_get(cache_t *cache, uint32_t key)
{
if (cache->c <= 0 || list_empty(&cache->map->ht_list_head[HASH(key)]))
return NULL;

arc_entry_t *entry = NULL;
list_for_each_entry(entry, &cache->map->ht_list_head[HASH(key)], ht_list)
{
if (entry->key == key)
break;
}
#if RV32_HAS(ARCACHE_INFO)
cache->get_time++;
#endif
if (!entry || entry->key != key)
return NULL;
/* cache hit in T1 */
if (entry->arc_type == T1) {
#if RV32_HAS(ARCACHE_INFO)
cache->hit_time++;
#endif
replaceT2(cache);
move_to_mru(cache, entry, T2);
}

/* cache hit in T2 */
if (entry->arc_type == T2) {
#if RV32_HAS(ARCACHE_INFO)
cache->hit_time++;
#endif
move_to_mru(cache, entry, T2);
}

/* cache hit in B1 */
if (entry->arc_type == B1) {
cache->p = min(cache->p + 1, cache->c);
replaceT2(cache);
move_to_mru(cache, entry, T2);
}

/* cache hit in B2 */
if (entry->arc_type == B2) {
cache->p = max(cache->p - 1, 0);
replaceT1(cache);
move_to_mru(cache, entry, T2);
}
#if RV32_HAS(ARCACHE_INFO)
assert_cache(cache);
#endif
/* return NULL if cache miss */
return entry->value;
}

void *cache_put(cache_t *cache, uint32_t key, void *value)
{
#if RV32_HAS(ARCACHE_INFO)
cache->get_time++;
#endif
void *delete_value = NULL;
#if RV32_HAS(ARCACHE_INFO)
assert(cache->list_size[T1] + cache->list_size[B1] <= cache->c);
#endif
/* Before adding new element to cach, we should check the status
* of cache.
*/
if ((cache->list_size[T1] + cache->list_size[B1]) == cache->c) {
if (cache->list_size[T1] < cache->c) {
arc_entry_t *delete_target =
list_last_entry(cache->list_table[B1], arc_entry_t, list);
list_del_init(&delete_target->list);
list_del_init(&delete_target->ht_list);
delete_value = delete_target->value;
free(delete_target);
cache->list_size[B1]--;
replaceT1(cache);
} else {
arc_entry_t *delete_target =
list_last_entry(cache->list_table[T1], arc_entry_t, list);
list_del_init(&delete_target->list);
list_del_init(&delete_target->ht_list);
delete_value = delete_target->value;
free(delete_target);
cache->list_size[T1]--;
}
} else {
#if RV32_HAS(ARCACHE_INFO)
assert(cache->list_size[T1] + cache->list_size[B1] < cache->c);
#endif
uint32_t size = cache->list_size[T1] + cache->list_size[B1] +
cache->list_size[T2] + cache->list_size[B2];
if (size == cache->c * 2) {
arc_entry_t *delete_target =
list_last_entry(cache->list_table[B2], arc_entry_t, list);
list_del_init(&delete_target->list);
list_del_init(&delete_target->ht_list);
delete_value = delete_target->value;
free(delete_target);
cache->list_size[B2]--;
}
if (cache->list_size[T1] + cache->list_size[T2] >= cache->c &&
cache->list_size[T1] < cache->p)
replaceT2(cache);
else
replaceT1(cache);
}
arc_entry_t *new_entry = (arc_entry_t *) malloc(sizeof(arc_entry_t));
new_entry->key = key;
new_entry->value = value;
new_entry->arc_type = T1;
list_add(&new_entry->list, cache->list_table[T1]);
list_add(&new_entry->ht_list, &cache->map->ht_list_head[HASH(key)]);
cache->list_size[T1]++;
#if RV32_HAS(ARCACHE_INFO)
assert_cache(cache);
#endif
return delete_value;
}

#if RV32_HAS(ARCACHE_INFO)
void cache_print_stats(cache_t *cache)
{
printf(
"requests: %12lu \n"
"hits: %12lu \n"
"ratio: %lf%%\n",
cache->get_time, cache->hit_time,
cache->hit_time * 100 / (double) cache->get_time);
}
#endif
42 changes: 42 additions & 0 deletions src/cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "list.h"

/*
T1: LRU List
T2: LFU List
B1: LRU Ghost List
B2: LFU Ghost List
*/
typedef enum { T1, B1, T2, B2 } arc_type_t;

struct hashtable;

typedef struct cache {
struct list_head *list_table[4];
uint32_t list_size[4];
struct hashtable *map;
uint32_t c;
uint32_t p;
#if RV32_HAS(ARCACHE_INFO)
uint64_t get_time;
uint64_t hit_time;
#endif
} cache_t;

cache_t *cache_create();

void cache_free(cache_t *cache, void (*release_entry)(void *));

void *cache_get(cache_t *cache, uint32_t key);

void *cache_put(cache_t *cache, uint32_t key, void *value);

#if RV32_HAS(ARCACHE_INFO)
void cache_print_stats(cache_t *cache);
#endif
25 changes: 20 additions & 5 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ static bool insn_is_branch(uint8_t opcode)
}
return false;
}

#if !RV32_HAS(ARCACHE)
/* hash function is used when mapping address into the block map */
static uint32_t hash(size_t k)
{
Expand All @@ -1262,7 +1262,7 @@ static uint32_t hash(size_t k)
#endif
return k;
}

#endif
/* allocate a basic block */
static block_t *block_alloc(const uint8_t bits)
{
Expand All @@ -1273,7 +1273,7 @@ static block_t *block_alloc(const uint8_t bits)
block->ir = malloc(block->insn_capacity * sizeof(rv_insn_t));
return block;
}

#if !RV32_HAS(ARCACHE)
/* insert a block into block map */
static void block_insert(block_map_t *map, const block_t *block)
{
Expand Down Expand Up @@ -1309,7 +1309,7 @@ static block_t *block_find(const block_map_t *map, const uint32_t addr)
}
return NULL;
}

#endif
static void block_translate(riscv_t *rv, block_t *block)
{
block->pc_start = block->pc_end = rv->PC;
Expand Down Expand Up @@ -1342,24 +1342,39 @@ static void block_translate(riscv_t *rv, block_t *block)

static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
{
#if RV32_HAS(ARCACHE)
/* lookup the next block in the block cache */
block_t *next = (block_t *) cache_get(rv->cache, rv->PC);
#else
block_map_t *map = &rv->block_map;
/* lookup the next block in the block map */
block_t *next = block_find(map, rv->PC);

#endif
if (!next) {
#if !RV32_HAS(ARCACHE)
if (map->size * 1.25 > map->block_capacity) {
block_map_clear(map);
prev = NULL;
}
#endif

/* allocate a new block */
next = block_alloc(10);

/* translate the basic block */
block_translate(rv, next);

#if RV32_HAS(ARCACHE)
/* insert the block into block cache */
block_t *delete_target = cache_put(rv->cache, rv->PC, &(*next));
if (delete_target) {
free(delete_target->ir);
free(delete_target);
}
#else
/* insert the block into block map */
block_insert(&rv->block_map, next);
#endif

/* update the block prediction
* When we translate a new block, the block predictor may benefit,
Expand Down
10 changes: 10 additions & 0 deletions src/feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@
#define RV32_FEATURE_GDBSTUB 1
#endif

/* Import adaptive replacement cache to manage block */
#ifndef RV32_FEATURE_ARCACHE
#define RV32_FEATURE_ARCACHE 1
#endif

/* Print cache information */
#ifndef RV32_FEATURE_ARCACHE_INFO
#define RV32_FEATURE_ARCACHE_INFO 1
#endif

/* Feature test macro */
#define RV32_HAS(x) RV32_FEATURE_##x

Expand Down
Loading

0 comments on commit 45ff941

Please sign in to comment.