From 06521a7332660edb4be39080e909d12b9bce64c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20=F0=9F=91=A8=F0=9F=8F=BD=E2=80=8D=F0=9F=92=BB=20Copl?= =?UTF-8?q?an?= Date: Fri, 1 Nov 2024 17:17:50 -0700 Subject: [PATCH] @@@ --- bytecode-vm-compiler/Makefile | 6 +- bytecode-vm-compiler/src/compiler.c | 4 +- bytecode-vm-compiler/src/memory.c | 3 + bytecode-vm-compiler/src/object.c | 13 ++- bytecode-vm-compiler/src/object.h | 2 + bytecode-vm-compiler/src/value.c | 1 + bytecode-vm-compiler/src/vm.c | 5 +- bytecode-vm-compiler/src/vm.h | 2 +- bytecode-vm-compiler/test/test-table.c | 132 +++++++++++++++++++++++-- bytecode-vm-compiler/test/test-vm.c | 76 +++++++------- 10 files changed, 192 insertions(+), 52 deletions(-) diff --git a/bytecode-vm-compiler/Makefile b/bytecode-vm-compiler/Makefile index 6c4fdb4..fb6fdbb 100644 --- a/bytecode-vm-compiler/Makefile +++ b/bytecode-vm-compiler/Makefile @@ -2,7 +2,7 @@ SHELL=/bin/bash .SHELLFLAGS := -o pipefail -o errexit -o nounset -c CC ?= cc -CFLAGS ?= -std=gnu2x -g -Wall -Wextra -Wpedantic -Wimplicit-fallthrough -Werror -Wno-gnu-statement-expression -Wno-strict-prototypes +CFLAGS ?= -std=gnu2x -g -Wall -Wextra -Wpedantic -Wimplicit-fallthrough -Werror -Wno-gnu-statement-expression -Wno-strict-prototypes -Wno-unused-parameter SRC_FILES= src/chunk.c src/compiler.c src/debug.c src/memory.c src/object.c src/scanner.c src/stack.c src/value.c src/vm.c lox: src/main.c $(SRC_FILES) src/*.h @@ -16,7 +16,7 @@ test/test-vm: test/test-vm.c src/vm.h src/vm.c src/chunk.h src/chunk.c src/value $(CC) -fsanitize=address $(CFLAGS) -Wno-error test/test-vm.c $(SRC_FILES) -o test/test-vm test/test-vm || (mv test/test-vm test/test-vm.failed && false) test/test-stack: test/test-stack.c src/stack.h src/stack.c - $(CC) -fsanitize=address $(CFLAGS) test/test-stack.c $(SRC_FILES) -o test/test-stack + $(CC) -fsanitize=address $(CFLAGS) -Wno-error test/test-stack.c $(SRC_FILES) -o test/test-stack test/test-stack test/test-scanner: test/test-scanner.c src/scanner.h src/scanner.c src/lox_assert.h $(CC) -fsanitize=address $(CFLAGS) -Wno-error test/test-scanner.c $(SRC_FILES) -o test/test-scanner @@ -25,7 +25,7 @@ test/test-compiler: test/test-compiler.c src/compiler.h src/compiler.c src/scann $(CC) -DDEBUG_TRACE_EXECUTION -fsanitize=address $(CFLAGS) -Wno-error test/test-compiler.c $(SRC_FILES) -o test/test-compiler test/test-compiler || mv -v test/test-compiler test/test-compiler.failed test/test-table: test/test-table.c test/test-vm - $(CC) -fsanitize=address $(CFLAGS) -Wno-error test/test-table.c $(SRC_FILES) -o test/test-table + $(CC) $(CFLAGS) -fsanitize=address $(EXTRA_CFLAGS) -Wno-error test/test-table.c $(SRC_FILES) -o test/test-table test/test-table || mv test/test-table test/test-table.failed diff --git a/bytecode-vm-compiler/src/compiler.c b/bytecode-vm-compiler/src/compiler.c index 7ab425e..5487e1f 100644 --- a/bytecode-vm-compiler/src/compiler.c +++ b/bytecode-vm-compiler/src/compiler.c @@ -95,6 +95,7 @@ LoxString* fromCString(char const* cString, size_t length) { str->chars[length] = '\0'; str->length = length; + str->hash = computeHashOfCString(cString, length); return str; } @@ -273,12 +274,13 @@ static void String() { LOX_ASSERT(parser.previous.type == TOKEN_STRING); // TODO: support string escape sequences like '\n' + char const* strStart = parser.previous.start + 1; int strLength = parser.previous.length - 2; // subtract quotes // FIXME: This sucks. We need to embed the string into the bytecode // directly instead of allocating on the heap because now the bytecode // isn't portable. - LoxString* str = fromCString(parser.previous.start + 1, strLength); + LoxString* str = fromCString(strStart, strLength); Value value = {.type = VAL_OBJ, .as.obj = (LoxObj*)str}; diff --git a/bytecode-vm-compiler/src/memory.c b/bytecode-vm-compiler/src/memory.c index 05332e5..79e7d07 100644 --- a/bytecode-vm-compiler/src/memory.c +++ b/bytecode-vm-compiler/src/memory.c @@ -44,6 +44,9 @@ void freeObjects(VM* vm) { LoxObj* next = NULL; while (curr) { next = curr->next; +#ifdef DEBUG_TRACE_EXECUTION + printObject(OBJ_VAL(curr)); +#endif freeLoxObject(curr); curr = next; } diff --git a/bytecode-vm-compiler/src/object.c b/bytecode-vm-compiler/src/object.c index 2d777b5..cc53719 100644 --- a/bytecode-vm-compiler/src/object.c +++ b/bytecode-vm-compiler/src/object.c @@ -29,7 +29,8 @@ static LoxObj* allocateObj(VM* vm, size_t size, ObjType type) { } // TODO: refactor to use `newEmptyLoxString` -LoxString* newLoxStringFromCString(VM* vm, char const* cString, size_t length) { +LoxString* newLoxStringFromCString(VM* vm, char const* cString, size_t length, + uint32_t hash) { LoxString* str = (LoxString*)allocateObj( vm, sizeof(LoxString) + (length + 1) * sizeof(char), OBJ_STRING); @@ -39,6 +40,7 @@ LoxString* newLoxStringFromCString(VM* vm, char const* cString, size_t length) { str->chars[length] = '\0'; str->length = length; + str->hash = hash; return str; } @@ -53,3 +55,12 @@ LoxString* newEmptyLoxString(VM* vm, size_t length) { str->length = length; return str; } + +uint32_t computeHashOfCString(char const* chars, size_t length) { + uint32_t hash = 2166136261u; + for (size_t i = 0; i < length; ++i) { + hash ^= (uint8_t)chars[i]; + hash *= 16777619; + } + return hash; +} diff --git a/bytecode-vm-compiler/src/object.h b/bytecode-vm-compiler/src/object.h index a518916..d8710f4 100644 --- a/bytecode-vm-compiler/src/object.h +++ b/bytecode-vm-compiler/src/object.h @@ -24,6 +24,7 @@ typedef struct { LoxObj obj; /** same as return value of strlen */ size_t length; + uint32_t hash; char chars[]; } LoxString; @@ -32,3 +33,4 @@ void printObject(Value value); static inline bool isObjType(Value value, ObjType type) { return IS_OBJ(value) && AS_OBJ(value)->type == type; } +uint32_t computeHashOfCString(char const* chars, size_t length); diff --git a/bytecode-vm-compiler/src/value.c b/bytecode-vm-compiler/src/value.c index 16578cf..1151a60 100644 --- a/bytecode-vm-compiler/src/value.c +++ b/bytecode-vm-compiler/src/value.c @@ -59,6 +59,7 @@ void printValue(Value value) { } static bool compareLoxStrings(LoxString* a, LoxString* b) { + // TODO(perf): compare string.hash instead if (a == b) return true; if (a->length != b->length) diff --git a/bytecode-vm-compiler/src/vm.c b/bytecode-vm-compiler/src/vm.c index 25e22a5..fc1848f 100644 --- a/bytecode-vm-compiler/src/vm.c +++ b/bytecode-vm-compiler/src/vm.c @@ -61,7 +61,7 @@ void freeVM(void) { * q: does this copy the VM? Looking through the properties of the VM, the only * property that might actually be copied is `vm.stack.cap` */ -VM getVM_(void) { return vm; } +VM* getVM_(void) { return &vm; } InterpretResult run(void) { #define READ_BYTE() (*vm.ip++) @@ -185,6 +185,9 @@ InterpretResult run(void) { // This line does NOTHING. // See NOTE[why-LoxString-null-terminates] addedLoxStr->chars[finalStringLength] = '\0'; + uint32_t hash = computeHashOfCString(addedLoxStr->chars, + addedLoxStr->length); + addedLoxStr->hash = hash; stack_push(&vm.stack, OBJ_VAL(addedLoxStr)); } else { runtimeError( diff --git a/bytecode-vm-compiler/src/vm.h b/bytecode-vm-compiler/src/vm.h index 3fd535c..9e55698 100644 --- a/bytecode-vm-compiler/src/vm.h +++ b/bytecode-vm-compiler/src/vm.h @@ -23,7 +23,7 @@ typedef enum { void initVM(void); void freeVM(void); -VM getVM_(void); +VM* getVM_(void); InterpretResult interpret_bytecode_(Chunk* chunk); InterpretResult interpret(char const* source); diff --git a/bytecode-vm-compiler/test/test-table.c b/bytecode-vm-compiler/test/test-table.c index 1e89b01..be25348 100644 --- a/bytecode-vm-compiler/test/test-table.c +++ b/bytecode-vm-compiler/test/test-table.c @@ -1,7 +1,13 @@ +/** + * idea: make this its own library independent of Lox, with keys of type + * `{uint32_t hash}` and values of type `void*`. + * Then create a wrapper in Lox that takes keys with type `LoxString` and values + * of type `Value*` + */ #include "../src/lox_assert.h" #include "../src/vm.h" #include -LoxString* allocateString(VM* vm, char const* cString, size_t length); +LoxString* newLoxStringFromCString(VM* vm, char const* cString, size_t length); #include "../src/common.h" #include "../src/memory.h" @@ -31,19 +37,60 @@ void tableFree(LoxTable* table) { } FREE_ARRAY(Entry, table->entries, table->capacity); } + +static TableEntry* findEntry(LoxTable const* table, LoxString const* key) { + uint32_t index = key->hash % table->capacity; + while (true) { + // printf("index: %u\n", index); + TableEntry* curr = &table->entries[index]; + if (curr->key == NULL || curr->key->hash == key->hash) + return curr; + index = (index + 1) % (table->capacity); + } +} + +static void adjustCapacity(LoxTable* table, size_t newCapacity) { + TableEntry* newEntries = ALLOCATE(TableEntry, newCapacity); + + // init each entry + for (size_t i = 0; i < newCapacity; ++i) { + newEntries[i].key = NULL; + newEntries[i].value = NIL_VAL; + } + + table->capacity = newCapacity; + table->entries = newEntries; +} + bool tableSet(LoxTable* table, LoxString* key, Value value); -bool tableSet(LoxTable* table, LoxString* key, Value value) {} +#define TABLE_MAX_LOAD_RATIO 0.69 +bool tableSet(LoxTable* table, LoxString* key, Value value) { + // FIXME: this conversion looks weird. Probably should multiply and divide + // whole numbers instead + if (table->count + 1 > + (size_t)((int)table->capacity * TABLE_MAX_LOAD_RATIO)) { + size_t newCapacity = GROW_CAPACITY(table->capacity); + adjustCapacity(table, newCapacity); + } + TableEntry* entry = findEntry(table, key); + ++table->count; + + entry->key = key; + entry->value = value; + return true; +} Value* tableGet(LoxTable* table, LoxString* key); -Value* tableGet(LoxTable* table, LoxString* key) {} +Value* tableGet(LoxTable* table, LoxString* key) { + return &findEntry(table, key)->value; +} -int main(void) { - // basic +static void basic(void) { initVM(); - VM vm = getVM_(); + VM* vm = getVM_(); LoxTable table; tableInit(&table); char const* k = "key"; - LoxString* key = allocateString(&vm, "key", strlen(k)); + LoxString* key = newLoxStringFromCString(vm, "key", strlen(k)); Value want = NUMBER_VAL(42.0); tableSet(&table, key, want); @@ -51,4 +98,75 @@ int main(void) { LOX_ASSERT_VALUE_EQUALS(*res, want); tableFree(&table); + freeVM(); +} + +static void overwrite(void) { + initVM(); + VM* vm = getVM_(); + LoxTable table; + tableInit(&table); + char const* k = "key"; + LoxString* key = newLoxStringFromCString(vm, "key", strlen(k)); + Value one = NUMBER_VAL(1); + tableSet(&table, key, one); + + Value* res = tableGet(&table, key); + + LOX_ASSERT_VALUE_EQUALS(*res, one); + Value want = NUMBER_VAL(2); + tableSet(&table, key, want); + + res = tableGet(&table, key); + + LOX_ASSERT_VALUE_EQUALS(*res, want); + tableFree(&table); + freeVM(); +} + +/** + * This test no longer does what it was designed for. + * When I wrote this test, the hash function was hardcoded to always return 69, + * and this test passed. Now we have a better hash function. + */ +static void collision(void) { + initVM(); + VM* vm = getVM_(); + LoxTable table; + tableInit(&table); + char const* k1 = "costarring"; + LoxString* key1 = newLoxStringFromCString(vm, k1, strlen(k1)); + Value one = NUMBER_VAL(1); + tableSet(&table, key1, one); + + char const* k2 = "liquid"; + LoxString* key2 = newLoxStringFromCString(vm, k2, strlen(k2)); + Value two = NUMBER_VAL(2); + tableSet(&table, key2, two); + + Value* res1 = tableGet(&table, key1); + Value* res2 = tableGet(&table, key2); + + LOX_ASSERT_VALUE_EQUALS(*res1, one); + LOX_ASSERT_VALUE_EQUALS(*res2, two); + tableFree(&table); + freeVM(); +} + +int main(void) { + basic(); + overwrite(); + // delete(); + // grow(); + + // FIXME: how do I test collisions? + // ideas: + // 1. take the hash function as an argument + // 2. make a small hash table, and add hundreds of entries to it, + // statistically there'd be a collision somewhere in here... + // I prefer 2. + // + // Currently, when I want to test this function, I change the + // `computeHashOfCString` function to always return 69. That works too... + collision(); } diff --git a/bytecode-vm-compiler/test/test-vm.c b/bytecode-vm-compiler/test/test-vm.c index 1ecc4ff..93c2865 100644 --- a/bytecode-vm-compiler/test/test-vm.c +++ b/bytecode-vm-compiler/test/test-vm.c @@ -189,9 +189,9 @@ void addToStack(void) { writeChunkNoLine(&chunk, OP_RETURN); } interpret_bytecode_(&chunk); - VM vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), NUMBER_VAL(69)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 1), NUMBER_VAL(42)); + VM* vm = getVM_(); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), NUMBER_VAL(69)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 1), NUMBER_VAL(42)); freeChunk(&chunk); freeVM(); } @@ -214,8 +214,8 @@ void addition(void) { writeChunkNoLine(&chunk, OP_RETURN); } interpret_bytecode_(&chunk); - VM vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), NUMBER_VAL(69)); + VM* vm = getVM_(); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), NUMBER_VAL(69)); freeChunk(&chunk); freeVM(); } @@ -231,11 +231,11 @@ void additionWithCompile(void) { LOX_ASSERT_EQUALS(result, INTERPRET_OK); - VM vm = getVM_(); + VM* vm = getVM_(); int want = 3; - Value go = peekStack(&vm, 0); + Value go = peekStack(vm, 0); LOX_ASSERT_VALUE_EQUALS(go, NUMBER_VAL(want)); freeChunk(&chunk); @@ -260,9 +260,9 @@ void subtraction(void) { writeChunkNoLine(&chunk, OP_RETURN); } interpret_bytecode_(&chunk); - VM vm = getVM_(); - LOX_ASSERT_EQUALS(peekStack(&vm, 0).type, VAL_NUMBER); - LOX_ASSERT_EQUALS(peekStack(&vm, 0).as.number, 4); + VM* vm = getVM_(); + LOX_ASSERT_EQUALS(peekStack(vm, 0).type, VAL_NUMBER); + LOX_ASSERT_EQUALS(peekStack(vm, 0).as.number, 4); freeChunk(&chunk); freeVM(); } @@ -285,9 +285,9 @@ void negation(void) { writeChunkNoLine(&chunk, OP_RETURN); } InterpretResult status = interpret_bytecode_(&chunk); - VM vm = getVM_(); + VM* vm = getVM_(); assert_vm_exit_ok(status); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), NUMBER_VAL(-7)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), NUMBER_VAL(-7)); freeChunk(&chunk); freeVM(); } @@ -310,8 +310,8 @@ void multiplication(void) { writeChunkNoLine(&chunk, OP_RETURN); } interpret_bytecode_(&chunk); - VM vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), NUMBER_VAL(420)); + VM* vm = getVM_(); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), NUMBER_VAL(420)); freeChunk(&chunk); freeVM(); } @@ -334,8 +334,8 @@ void division(void) { writeChunkNoLine(&chunk, OP_RETURN); } interpret_bytecode_(&chunk); - VM vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), NUMBER_VAL(42)); + VM* vm = getVM_(); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), NUMBER_VAL(42)); freeChunk(&chunk); freeVM(); } @@ -379,14 +379,14 @@ void not(void) { writeChunkNoLine(&chunk, OP_RETURN); } interpret_bytecode_(&chunk); - VM vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 6), BOOL_VAL(false)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 5), BOOL_VAL(true)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 4), BOOL_VAL(true)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 3), BOOL_VAL(true)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 2), BOOL_VAL(false)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 1), BOOL_VAL(true)); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), BOOL_VAL(true)); + VM* vm = getVM_(); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 6), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 5), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 4), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 3), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 2), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 1), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), BOOL_VAL(true)); freeChunk(&chunk); freeVM(); } @@ -450,22 +450,22 @@ static void comparison() { } InterpretResult result = interpret_bytecode_(&chunk); LOX_ASSERT_EQUALS(result, INTERPRET_OK); - VM vm = getVM_(); + VM* vm = getVM_(); // true==true is true - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 6), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 6), BOOL_VAL(true)); // 0==1 is false - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 5), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 5), BOOL_VAL(false)); // 0==false is false - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 4), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 4), BOOL_VAL(false)); // 0.1>1.5 is false - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 3), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 3), BOOL_VAL(false)); // 69.42 > 42.69 is true - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 2), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 2), BOOL_VAL(true)); // 1.5 < 0.1 is false - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 1), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 1), BOOL_VAL(false)); // 42.69 < 69.42 is true - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), BOOL_VAL(true)); freeChunk(&chunk); freeVM(); } @@ -482,9 +482,9 @@ static void stringComp() { LOX_ASSERT_EQUALS(result, INTERPRET_OK); - VM vm = getVM_(); + VM* vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), BOOL_VAL(true)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), BOOL_VAL(true)); freeChunk(&chunk); freeVM(); } @@ -500,9 +500,9 @@ static void stringComp() { LOX_ASSERT_EQUALS(result, INTERPRET_OK); - VM vm = getVM_(); + VM* vm = getVM_(); - LOX_ASSERT_VALUE_EQUALS(peekStack(&vm, 0), BOOL_VAL(false)); + LOX_ASSERT_VALUE_EQUALS(peekStack(vm, 0), BOOL_VAL(false)); } } @@ -516,11 +516,11 @@ static void stringAdd() { LOX_ASSERT_EQUALS(result, INTERPRET_OK); - VM vm = getVM_(); + VM* vm = getVM_(); char const* want = "string append"; - char* got = AS_CSTRING(peekStack(&vm, 0)); + char* got = AS_CSTRING(peekStack(vm, 0)); LOX_ASSERT_STRING_EQUALS(got, want); freeChunk(&chunk);