Skip to content

Commit

Permalink
Add support for LexicalScopes, Variables, and SourceCoordinates (#32)
Browse files Browse the repository at this point in the history
This PR is adding infrastructure to enable inlining on the bytecode
level.

It introduces the notion of LexicalScopes, Variables, and
SourceCoordinates.
LexicalScopes encode the info needed about the program
structure/relationship between methods and blocks.
Variables encode the location of arguments and locals, in terms of
SourceCoordinates.

This PR also reduces the number of symbols that need to be created by
relying on std::strings for aspects that won't be exposed to the
language level.

There's also a bit extra support here for debugging the GCs and
interactive debugging, for instance printing/dumping of methods.

This is split out as a PR to be able to assess the performance impact.
  • Loading branch information
smarr authored Jul 31, 2024
2 parents eca9b14 + ed8667e commit f3758d7
Show file tree
Hide file tree
Showing 35 changed files with 451 additions and 231 deletions.
12 changes: 12 additions & 0 deletions SOM.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
0A1887451832C62100A2CBCA /* Smalltalk in CopyFiles */ = {isa = PBXBuildFile; fileRef = 0A18873E1832C62100A2CBCA /* Smalltalk */; };
0A1887471832C62100A2CBCA /* TestSuite in CopyFiles */ = {isa = PBXBuildFile; fileRef = 0A1887401832C62100A2CBCA /* TestSuite */; };
0A1C98582C3DD88500735850 /* unitTests/BytecodeGenerationTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0A1C98572C3DD88500735850 /* unitTests/BytecodeGenerationTest.cpp */; };
0A1C98602C43D6E200735850 /* Variable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0A1C985F2C43D6E200735850 /* Variable.cpp */; };
0A1C98612C43D6E200735850 /* Variable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0A1C985F2C43D6E200735850 /* Variable.cpp */; };
0A1C98672C4D340300735850 /* Symbols.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0A1C98652C4D340300735850 /* Symbols.cpp */; };
0A1C98682C4D340300735850 /* Symbols.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0A1C98652C4D340300735850 /* Symbols.cpp */; };
0A1C986B2C4D363A00735850 /* LogAllocation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0A1C986A2C4D363A00735850 /* LogAllocation.cpp */; };
Expand Down Expand Up @@ -196,6 +198,10 @@
0A1C98562C3DD87300735850 /* unitTests/BytecodeGenerationTest.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = unitTests/BytecodeGenerationTest.h; sourceTree = "<group>"; };
0A1C98572C3DD88500735850 /* unitTests/BytecodeGenerationTest.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = unitTests/BytecodeGenerationTest.cpp; sourceTree = "<group>"; };
0A1C98592C432E0E00735850 /* VectorUtil.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = VectorUtil.h; sourceTree = "<group>"; };
0A1C985A2C4346F900735850 /* LexicalScope.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LexicalScope.h; sourceTree = "<group>"; };
0A1C985E2C43D6D000735850 /* Variable.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Variable.h; sourceTree = "<group>"; };
0A1C985F2C43D6E200735850 /* Variable.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Variable.cpp; sourceTree = "<group>"; };
0A1C98622C499F9400735850 /* SourceCoordinate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SourceCoordinate.h; sourceTree = "<group>"; };
0A1C98642C4D33F300735850 /* Symbols.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Symbols.h; sourceTree = "<group>"; };
0A1C98652C4D340300735850 /* Symbols.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Symbols.cpp; sourceTree = "<group>"; };
0A1C98692C4D35BB00735850 /* LogAllocation.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LogAllocation.h; sourceTree = "<group>"; };
Expand Down Expand Up @@ -493,6 +499,10 @@
3F5202FC0FA6624C00E75857 /* Parser.h */,
3F5202FD0FA6624C00E75857 /* SourcecodeCompiler.cpp */,
3F5202FE0FA6624C00E75857 /* SourcecodeCompiler.h */,
0A1C985A2C4346F900735850 /* LexicalScope.h */,
0A1C985E2C43D6D000735850 /* Variable.h */,
0A1C985F2C43D6E200735850 /* Variable.cpp */,
0A1C98622C499F9400735850 /* SourceCoordinate.h */,
);
path = compiler;
sourceTree = "<group>";
Expand Down Expand Up @@ -877,6 +887,7 @@
0A3A3CB11A5D5475004CB03B /* PrimitiveLoader.cpp in Sources */,
0A1887001832BCFA00A2CBCA /* VMInteger.cpp in Sources */,
0A3A3C931A5D546D004CB03B /* Block.cpp in Sources */,
0A1C98602C43D6E200735850 /* Variable.cpp in Sources */,
0A70752A297DF9FE00EB9F59 /* ParseInteger.cpp in Sources */,
0A3A3CB01A5D5475004CB03B /* PrimitiveContainer.cpp in Sources */,
0A3A3C981A5D546D004CB03B /* Object.cpp in Sources */,
Expand Down Expand Up @@ -966,6 +977,7 @@
0A3A3CA81A5D546D004CB03B /* Primitive.cpp in Sources */,
0A67EA9619ACD84300830E3B /* Timer.cpp in Sources */,
0A67EA8D19ACD83200830E3B /* BytecodeGenerator.cpp in Sources */,
0A1C98612C43D6E200735850 /* Variable.cpp in Sources */,
0A67EA7619ACD43A00830E3B /* main.cpp in Sources */,
0A67EA8819ACD74800830E3B /* VMPrimitive.cpp in Sources */,
0A3A3CA21A5D546D004CB03B /* Block.cpp in Sources */,
Expand Down
26 changes: 13 additions & 13 deletions src/compiler/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,20 +105,20 @@ void Disassembler::Dump(VMClass* cl) {
/**
* Dump all Bytecode of a method.
*/
void Disassembler::DumpMethod(VMMethod* method, const char* indent) {
dumpMethod(method->GetBytecodes(), method->GetNumberOfBytecodes(), indent, method);
void Disassembler::DumpMethod(VMMethod* method, const char* indent, bool printObjects) {
dumpMethod(method->GetBytecodes(), method->GetNumberOfBytecodes(), indent, method, printObjects);
}

void Disassembler::DumpMethod(MethodGenerationContext* mgenc, const char* indent) {
auto bytecodes = mgenc->GetBytecodes();
dumpMethod(bytecodes.data(), bytecodes.size(), indent, nullptr);
dumpMethod(bytecodes.data(), bytecodes.size(), indent, nullptr, true);
}

void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, const char* indent, VMMethod* method) {
void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, const char* indent, VMMethod* method, bool printObjects) {
DebugPrint("(\n");
if (method != nullptr) { // output stack information
long locals = method->GetNumberOfLocals();
long max_stack = method->GetMaximumNumberOfStackElements();
size_t locals = method->GetNumberOfLocals();
size_t max_stack = method->GetMaximumNumberOfStackElements();
DebugDump("%s<%d locals, %d stack, %d bc_count>\n", indent, locals,
max_stack, method->GetNumberOfBytecodes());

Expand Down Expand Up @@ -163,7 +163,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
}
case BC_PUSH_FIELD: {
long fieldIdx = bytecodes[bc_idx+1];
if (method != nullptr) {
if (method != nullptr && printObjects) {
VMClass* holder = dynamic_cast<VMClass*>((VMObject*) method->GetHolder());
if (holder) {
VMSymbol* name = holder->GetInstanceFieldName(fieldIdx);
Expand All @@ -184,7 +184,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
char* nindent = new char[indent_size];
DebugPrint("block: (index: %d) ", bytecodes[bc_idx+1]);

if (method != nullptr) {
if (method != nullptr && printObjects) {
snprintf(nindent, indent_size, "%s\t", indent);
Disassembler::DumpMethod(static_cast<VMMethod*>(method->GetConstant(bc_idx)), nindent);
} else {
Expand All @@ -194,7 +194,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
break;
}
case BC_PUSH_CONSTANT: {
if (method != nullptr) {
if (method != nullptr && printObjects) {
vm_oop_t constant = method->GetConstant(bc_idx);
VMClass* cl = CLASS_OF(constant);
VMSymbol* cname = cl->GetName();
Expand All @@ -209,7 +209,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
break;
}
case BC_PUSH_GLOBAL: {
if (method != nullptr) {
if (method != nullptr && printObjects) {
vm_oop_t cst = method->GetConstant(bc_idx);
if (cst != nullptr) {
VMSymbol* name = static_cast<VMSymbol*>(cst);
Expand All @@ -233,7 +233,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
break;
case BC_POP_FIELD: {
long fieldIdx = bytecodes[bc_idx+1];
if (method != nullptr) {
if (method != nullptr && printObjects) {
VMClass* holder = dynamic_cast<VMClass*>((VMObject*) method->GetHolder());
if (holder) {
VMSymbol* name = holder->GetInstanceFieldName(fieldIdx);
Expand All @@ -247,7 +247,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
break;
}
case BC_SEND: {
if (method != nullptr) {
if (method != nullptr && printObjects) {
VMSymbol* name = static_cast<VMSymbol*>(method->GetConstant(bc_idx));
DebugPrint("(index: %d) signature: %s\n", bytecodes[bc_idx+1], name->GetStdString().c_str());
} else {
Expand All @@ -256,7 +256,7 @@ void Disassembler::dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, cons
break;
}
case BC_SUPER_SEND: {
if (method != nullptr) {
if (method != nullptr && printObjects) {
VMSymbol* name = static_cast<VMSymbol*>(method->GetConstant(bc_idx));
DebugPrint("(index: %d) signature: %s\n", bytecodes[bc_idx+1], name->GetStdString().c_str());
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/compiler/Disassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@
class Disassembler {
public:
static void Dump(VMClass* cl);
static void DumpMethod(VMMethod* method, const char* indent);
static void DumpMethod(VMMethod* method, const char* indent, bool printObjects = true);
static void DumpMethod(MethodGenerationContext* mgenc, const char* indent);
static void DumpBytecode(VMFrame* frame, VMMethod* method, long bc_idx);
private:
static void dispatch(vm_oop_t o);

static void dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, const char* indent, VMMethod* method);
static void dumpMethod(uint8_t* bytecodes, size_t numberOfBytecodes, const char* indent, VMMethod* method, bool printObjects);
};
8 changes: 6 additions & 2 deletions src/compiler/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,18 +124,22 @@ class Lexer {

StdString GetCurrentLine();

size_t getCurrentColumn() {
size_t GetCurrentColumn() const {
return state.startBufp + 1 - state.text.length();
}

size_t GetCurrentLineNumber() {
size_t GetCurrentLineNumber() const {
return state.lineNumber;
}

bool GetPeekDone() const {
return peekDone;
}

SourceCoordinate GetCurrentSource() const {
return SourceCoordinate(GetCurrentLineNumber(), GetCurrentColumn());
}

private:
int64_t fillBuffer();
void skipWhiteSpace();
Expand Down
49 changes: 49 additions & 0 deletions src/compiler/LexicalScope.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma once

#include "../vmobjects/ObjectFormats.h"
#include "Variable.h"

class LexicalScope {
friend class MethodGenerationContext;

public:
LexicalScope(LexicalScope* outer, vector<Variable> arguments, vector<Variable> locals) : outer(outer), arguments(arguments), locals(locals) {}

inline size_t GetNumberOfArguments() const {
return arguments.size();
}

inline size_t GetNumberOfLocals() const {
return locals.size();
}

void AddInlinedLocal(Variable& var) {
assert(var.GetIndex() == locals.size());
locals.push_back(var);
}

const Variable* GetLocal(size_t index, uint8_t ctxLevel) {
if (ctxLevel > 0) {
return outer->GetLocal(index, ctxLevel - 1);
}
return &locals.at(index);
}

/**
* This removes the inlined scope from the chain.
* Removal is done exactly once, after all embedded blocks
* were adapted.
*/
void DropInlinedScope() {
assert(outer != nullptr);
assert(outer->outer != nullptr);

LexicalScope* newOuter = outer->outer;
outer = newOuter;
}

private:
LexicalScope* outer;
vector<Variable> arguments;
vector<Variable> locals;
};
Loading

0 comments on commit f3758d7

Please sign in to comment.