From 14df5ac892084a9ef59c3f8a43cab75257240364 Mon Sep 17 00:00:00 2001 From: Yunfei Date: Mon, 23 Sep 2024 14:53:23 +0800 Subject: [PATCH] add lang="en" and class="notranslate" for chrome translate --- site/404.html | 2 +- site/a-bytecode-virtual-machine.html | 2 +- site/a-map-of-the-territory.html | 6 +- site/a-tree-walk-interpreter.html | 2 +- site/a-virtual-machine.html | 124 ++++++++-------- site/acknowledgements.html | 2 +- site/appendix-i.html | 14 +- site/appendix-ii.html | 94 ++++++------ site/backmatter.html | 2 +- site/calls-and-functions.html | 210 +++++++++++++-------------- site/chunks-of-bytecode.html | 162 ++++++++++----------- site/classes-and-instances.html | 96 ++++++------ site/classes.html | 192 ++++++++++++------------ site/closures.html | 204 +++++++++++++------------- site/compiling-expressions.html | 148 +++++++++---------- site/contents.html | 2 +- site/control-flow.html | 84 +++++------ site/dedication.html | 2 +- site/evaluating-expressions.html | 102 ++++++------- site/functions.html | 144 +++++++++--------- site/garbage-collection.html | 158 ++++++++++---------- site/global-variables.html | 162 ++++++++++----------- site/hash-tables.html | 110 +++++++------- site/index.html | 2 +- site/inheritance.html | 84 +++++------ site/introduction.html | 4 +- site/jumping-back-and-forth.html | 108 +++++++------- site/local-variables.html | 98 ++++++------- site/methods-and-initializers.html | 158 ++++++++++---------- site/optimization.html | 76 +++++----- site/parsing-expressions.html | 116 +++++++-------- site/representing-code.html | 94 ++++++------ site/resolving-and-binding.html | 186 ++++++++++++------------ site/scanning-on-demand.html | 154 ++++++++++---------- site/scanning.html | 148 +++++++++---------- site/statements-and-state.html | 188 ++++++++++++------------ site/strings.html | 120 +++++++-------- site/superclasses.html | 70 ++++----- site/the-lox-language.html | 82 +++++------ site/types-of-values.html | 96 ++++++------ site/welcome.html | 2 +- 41 files changed, 1905 insertions(+), 1905 deletions(-) diff --git a/site/404.html b/site/404.html index 7e3dbe63e..d3851f45f 100644 --- a/site/404.html +++ b/site/404.html @@ -1,5 +1,5 @@ - + 404 Page Not Found · Crafting Interpreters diff --git a/site/a-bytecode-virtual-machine.html b/site/a-bytecode-virtual-machine.html index 14faed200..2ebb2d1ca 100644 --- a/site/a-bytecode-virtual-machine.html +++ b/site/a-bytecode-virtual-machine.html @@ -1,5 +1,5 @@ - + A Bytecode Virtual Machine · Crafting Interpreters diff --git a/site/a-map-of-the-territory.html b/site/a-map-of-the-territory.html index 4ad575fb9..f130d2943 100644 --- a/site/a-map-of-the-territory.html +++ b/site/a-map-of-the-territory.html @@ -1,5 +1,5 @@ - + A Map of the Territory · Crafting Interpreters @@ -268,10 +268,10 @@

2 . 1 .&#

A simple example is constant folding: if some expression always evaluates to the exact same value, we can do the evaluation at compile time and replace the code for the expression with its result. If the user typed in this:

-
pennyArea = 3.14159 * (0.75 / 2) * (0.75 / 2);
+
pennyArea = 3.14159 * (0.75 / 2) * (0.75 / 2);
 

we could do all of that arithmetic in the compiler and change the code to:

-
pennyArea = 0.4417860938;
+
pennyArea = 0.4417860938;
 

Optimization is a huge part of the programming language business. Many language hackers spend their entire careers here, squeezing every drop of performance diff --git a/site/a-tree-walk-interpreter.html b/site/a-tree-walk-interpreter.html index b8b8c19a8..03f51395b 100644 --- a/site/a-tree-walk-interpreter.html +++ b/site/a-tree-walk-interpreter.html @@ -1,5 +1,5 @@ - + A Tree-Walk Interpreter · Crafting Interpreters diff --git a/site/a-virtual-machine.html b/site/a-virtual-machine.html index 8377b5aa6..4e6caffee 100644 --- a/site/a-virtual-machine.html +++ b/site/a-virtual-machine.html @@ -1,5 +1,5 @@ - + A Virtual Machine · Crafting Interpreters @@ -112,9 +112,9 @@

—literally a Chunkand it runs it. The code and data structures for the VM reside in a new module.

-
vm.h
+
vm.h
create new file
-
#ifndef clox_vm_h
+
#ifndef clox_vm_h
 #define clox_vm_h
 
 #include "chunk.h"
@@ -135,9 +135,9 @@ 

vm.c
+
vm.c
create new file
-
#include "common.h"
+
#include "common.h"
 #include "vm.h"
 
 VM vm; 
@@ -172,7 +172,7 @@ 

int main(int argc, const char* argv[]) {
+
int main(int argc, const char* argv[]) {
 
main.c
in main()
  initVM();
@@ -183,7 +183,7 @@ 

  disassembleChunk(&chunk, "test chunk");
+
 
 

One last ceremonial obligation:

-
#include "debug.h"
+
#include "debug.h"
 
main.c
#include "vm.h"
 
@@ -206,7 +206,7 @@ 

15 . 1 . 1Executing instructions

The VM springs into action when we command it to interpret a chunk of bytecode.

-
  disassembleChunk(&chunk, "test chunk");
+
  disassembleChunk(&chunk, "test chunk");
 
main.c
in main()
  interpret(&chunk);
@@ -215,7 +215,7 @@ 

15̴
main.c, in main()

This function is the main entrypoint into the VM. It’s declared like so:

-
void freeVM();
+
void freeVM();
 
vm.h
add after freeVM()
InterpretResult interpret(Chunk* chunk);
@@ -226,7 +226,7 @@ 

15̴
vm.h, add after freeVM()

The VM runs the chunk and then responds with a value from this enum:

-
} VM;
+
} VM;
 
 
vm.h
add after struct VM
@@ -245,9 +245,9 @@

15̴ errors and a VM that detects runtime errors, the interpreter will use this to know how to set the exit code of the process.

We’re inching towards some actual implementation.

-
vm.c
+
vm.c
add after freeVM()
-
InterpretResult interpret(Chunk* chunk) {
+
InterpretResult interpret(Chunk* chunk) {
   vm.chunk = chunk;
   vm.ip = vm.chunk->code;
   return run();
@@ -266,7 +266,7 @@ 

15̴ interpreter, we would store ip in a local variable. It gets modified so often during execution that we want the C compiler to keep it in a register.

-
typedef struct {
+
typedef struct {
   Chunk* chunk;
 
vm.h
in struct VM
@@ -291,9 +291,9 @@

15̴ to be executed. This will be true during the entire time the VM is running: the IP always points to the next instruction, not the one currently being handled.

The real fun happens in run().

-
vm.c
+
vm.c
add after freeVM()
-
static InterpretResult run() {
+
static InterpretResult run() {
 #define READ_BYTE() (*vm.ip++)
 
   for (;;) {
@@ -350,7 +350,7 @@ 

15̴ return from the current Lox function, but we don’t have functions yet, so we’ll repurpose it temporarily to end the execution.

Let’s go ahead and support our one other instruction.

-
    switch (instruction = READ_BYTE()) {
+
    switch (instruction = READ_BYTE()) {
 
vm.c
in run()
      case OP_CONSTANT: {
@@ -366,7 +366,7 @@ 

15̴

We don’t have enough machinery in place yet to do anything useful with a constant. For now, we’ll just print it out so we interpreter hackers can see what’s going on inside our VM. That call to printf() necessitates an include.

-
vm.c
+
vm.c
add to top of file
#include <stdio.h>
 
@@ -375,7 +375,7 @@ 

15̴
vm.c, add to top of file

We also have a new macro to define.

-
#define READ_BYTE() (*vm.ip++)
+
#define READ_BYTE() (*vm.ip++)
 
vm.c
in run()
#define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
@@ -393,7 +393,7 @@ 

15̴ run(). To make that scoping more explicit, the macro definitions themselves are confined to that function. We define them at the beginning andbecause we careundefine them at the end.

-
#undef READ_BYTE
+
#undef READ_BYTE
 
vm.c
in run()
#undef READ_CONSTANT
@@ -416,7 +416,7 @@ 

15 .  VM like we did with chunks themselves. In fact, we’ll even reuse the same code. We don’t want this logging enabled all the timeit’s just for us VM hackers, not Lox usersso first we create a flag to hide it behind.

-
#include <stdint.h>
+
#include <stdint.h>
 
common.h
 
@@ -430,7 +430,7 @@ 

15 . 

When this flag is defined, the VM disassembles and prints each instruction right before executing it. Where our previous disassembler walked an entire chunk once, statically, this disassembles instructions dynamically, on the fly.

-
  for (;;) {
+
  for (;;) {
 
vm.c
in run()
#ifdef DEBUG_TRACE_EXECUTION
@@ -448,7 +448,7 @@ 

15 .  bytecode. Then we disassemble the instruction that begins at that byte.

As ever, we need to bring in the declaration of the function before we can call it.

-
#include "common.h"
+
#include "common.h"
 
vm.c
#include "debug.h"
 
#include "vm.h"
@@ -465,7 +465,7 @@ 

1

In addition to imperative side effects, Lox has expressions that produce, modify, and consume values. Thus, our compiled bytecode needs a way to shuttle values around between the different instructions that need them. For example:

-
print 3 - 2;
+
print 3 - 2;
 

We obviously need instructions for the constants 3 and 2, the print statement, and the subtraction. But how does the subtraction instruction know that 3 is @@ -477,7 +477,7 @@

1 and “subtrahend” might be some sort of underground Paleolithic monument.

To put a finer point on it, look at this thing right here:

-
fun echo(n) {
+
fun echo(n) {
   print n;
   return n;
 }
@@ -508,7 +508,7 @@ 

1 statement, with numbers marking the order that the nodes are evaluated." />

Given left-to-right evaluation, and the way the expressions are nested, any correct Lox implementation must print these numbers in this order:

-
1  // from echo(1)
+
1  // from echo(1)
 2  // from echo(2)
 3  // from echo(1 + 2)
 4  // from echo(4)
@@ -593,7 +593,7 @@ 

15 . 2  generate much faster native code on the fly.

Alrighty, it’s codin’ time! Here’s the stack:

-
typedef struct {
+
typedef struct {
   Chunk* chunk;
   uint8_t* ip;
 
vm.h
@@ -637,7 +637,7 @@

15 . 2 

I remember it like this: stackTop points to where the next value to be pushed will go. The maximum number of values we can store on the stack (for now, at least) is:

-
#include "chunk.h"
+
#include "chunk.h"
 
vm.h
 
@@ -652,7 +652,7 @@ 

15 . 2  instructions to push too many values and run out of stack spacethe classic “stack overflow”. We could grow the stack dynamically as needed, but for now we’ll keep it simple. Since VM uses Value, we need to include its declaration.

-
#include "chunk.h"
+
#include "chunk.h"
 
vm.h
#include "value.h"
 
@@ -662,7 +662,7 @@ 

15 . 2 
vm.h

Now that VM has some interesting state, we get to initialize it.

-
void initVM() {
+
void initVM() {
 
vm.c
in initVM()
  resetStack();
@@ -671,9 +671,9 @@ 

15 . 2 
vm.c, in initVM()

That uses this helper function:

-
vm.c
+
vm.c
add after variable vm
-
static void resetStack() {
+
static void resetStack() {
   vm.stackTop = vm.stack;
 }
 
@@ -685,7 +685,7 @@

15 . 2  them. The only initialization we need is to set stackTop to point to the beginning of the array to indicate that the stack is empty.

The stack protocol supports two operations:

-
InterpretResult interpret(Chunk* chunk);
+
InterpretResult interpret(Chunk* chunk);
 
vm.h
add after interpret()
void push(Value value);
@@ -698,9 +698,9 @@ 

15 . 2 

You can push a new value onto the top of the stack, and you can pop the most recently pushed value back off. Here’s the first function:

-
vm.c
+
vm.c
add after freeVM()
-
void push(Value value) {
+
void push(Value value) {
   *vm.stackTop = value;
   vm.stackTop++;
 }
@@ -714,9 +714,9 @@ 

15 . 2  itself to point to the next unused slot in the array now that the previous slot is occupied.

Popping is the mirror image.

-
vm.c
+
vm.c
add after push()
-
Value pop() {
+
Value pop() {
   vm.stackTop--;
   return *vm.stackTop;
 }
@@ -734,7 +734,7 @@ 

15 . 2  make our lives as VM hackers easier if we had some visibility into the stack.

To that end, whenever we’re tracing execution, we’ll also show the current contents of the stack before we interpret each instruction.

-
#ifdef DEBUG_TRACE_EXECUTION
+
#ifdef DEBUG_TRACE_EXECUTION
 
vm.c
in run()
    printf("          ");
@@ -753,7 +753,7 @@ 

15 . 2  instruction on the stack. The output is pretty verbose, but it’s useful when we’re surgically extracting a nasty bug from the bowels of the interpreter.

Stack in hand, let’s revisit our two instructions. First up:

-
      case OP_CONSTANT: {
+
      case OP_CONSTANT: {
         Value constant = READ_CONSTANT();
 
vm.c
in run()
@@ -766,7 +766,7 @@

15 . 2 

In the last chapter, I was hand-wavey about how the OP_CONSTANT instruction “loads” a constant. Now that we have a stack you know what it means to actually produce a value: it gets pushed onto the stack.

-
      case OP_RETURN: {
+
      case OP_RETURN: {
 
vm.c
in run()
        printValue(pop());
@@ -786,13 +786,13 @@ 

15& with only the two rudimentary instructions we have so far. So let’s teach our interpreter to do arithmetic.

We’ll start with the simplest arithmetic operation, unary negation.

-
var a = 1.2;
+
var a = 1.2;
 print -a; // -1.2.
 

The prefix - operator takes one operand, the value to negate. It produces a single result. We aren’t fussing with a parser yet, but we can add the bytecode instruction that the above syntax will compile to.

-
  OP_CONSTANT,
+
  OP_CONSTANT,
 
chunk.h
in enum OpCode
  OP_NEGATE,
@@ -801,7 +801,7 @@ 

15&
chunk.h, in enum OpCode

We execute it like so:

-
      }
+
      }
 
vm.c
in run()
      case OP_NEGATE:   push(-pop()); break;
@@ -812,7 +812,7 @@ 

15&

The instruction needs a value to operate on, which it gets by popping from the stack. It negates that, then pushes the result back on for later instructions to use. Doesn’t get much easier than that. We can disassemble it too.

-
    case OP_CONSTANT:
+
    case OP_CONSTANT:
       return constantInstruction("OP_CONSTANT", chunk, offset);
 
debug.c
in disassembleInstruction()
@@ -823,7 +823,7 @@

15&
debug.c, in disassembleInstruction()

And we can try it out in our test chunk.

-
  writeChunk(&chunk, constant, 123);
+
  writeChunk(&chunk, constant, 123);
 
main.c
in main()
  writeChunk(&chunk, OP_NEGATE, 123);
@@ -836,7 +836,7 @@ 

15&

After loading the constant, but before returning, we execute the negate instruction. That replaces the constant on the stack with its negation. Then the return instruction prints that out:

-
-1.2
+
-1.2
 

Magical!

15 . 3 . 1Binary operators

@@ -849,7 +849,7 @@

15 . 3&

Lox has some other binary operatorscomparison and equalitybut those don’t produce numbers as a result, so we aren’t ready for them yet.

-
  OP_CONSTANT,
+
  OP_CONSTANT,
 
chunk.h
in enum OpCode
  OP_ADD,
@@ -861,7 +861,7 @@ 

15 . 3&
chunk.h, in enum OpCode

Back in the bytecode loop, they are executed like this:

-
      }
+
      }
 
vm.c
in run()
      case OP_ADD:      BINARY_OP(+); break;
@@ -877,7 +877,7 @@ 

15 . 3& arithmetic expression is some boilerplate code to pull values off the stack and push the result. When we later add dynamic typing, that boilerplate will grow. To avoid repeating that code four times, I wrapped it up in a macro.

-
#define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
+
#define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
 
vm.c
in run()
#define BINARY_OP(op) \
@@ -906,21 +906,21 @@ 

15 . 3& probably looks really weird. This macro needs to expand to a series of statements. To be careful macro authors, we want to ensure those statements all end up in the same scope when the macro is expanded. Imagine if you defined:

-
#define WAKE_UP() makeCoffee(); drinkCoffee();
+
#define WAKE_UP() makeCoffee(); drinkCoffee();
 

And then used it like:

-
if (morning) WAKE_UP();
+
if (morning) WAKE_UP();
 

The intent is to execute both statements of the macro body only if morning is true. But it expands to:

-
if (morning) makeCoffee(); drinkCoffee();;
+
if (morning) makeCoffee(); drinkCoffee();;
 

Oops. The if attaches only to the first statement. You might think you could fix this using a block.

-
#define WAKE_UP() { makeCoffee(); drinkCoffee(); }
+
#define WAKE_UP() { makeCoffee(); drinkCoffee(); }
 

That’s better, but you still risk:

-
if (morning)
+
if (morning)
   WAKE_UP();
 else
   sleepIn();
@@ -944,7 +944,7 @@ 

15 . 3& their order." />

As we did with the other macros inside run(), we clean up after ourselves at the end of the function.

-
#undef READ_CONSTANT
+
#undef READ_CONSTANT
 
vm.c
in run()
#undef BINARY_OP
@@ -953,7 +953,7 @@ 

15 . 3&
vm.c, in run()

Last is disassembler support.

-
    case OP_CONSTANT:
+
    case OP_CONSTANT:
       return constantInstruction("OP_CONSTANT", chunk, offset);
 
debug.c
in disassembleInstruction()
@@ -978,7 +978,7 @@

15 . 3& evaluated: -((1.2 + 3.4) / 5.6)" />

Building on our existing example chunk, here’s the additional instructions we need to hand-compile that AST to bytecode.

-
  int constant = addConstant(&chunk, 1.2);
+
  int constant = addConstant(&chunk, 1.2);
   writeChunk(&chunk, OP_CONSTANT, 123);
   writeChunk(&chunk, constant, 123);
 
main.c
@@ -1026,7 +1026,7 @@

Challenges

  • What bytecode instruction sequences would you generate for the following expressions:

    -
    1 * 2 + 3
    +
    1 * 2 + 3
     1 + 2 * 3
     3 - 2 - 1
     1 + 2 * 3 - 4 / -5
    @@ -1038,7 +1038,7 @@ 

    Challenges

    If we really wanted a minimal instruction set, we could eliminate either OP_NEGATE or OP_SUBTRACT. Show the bytecode instruction sequence you would generate for:

    -
    4 - 3 * -2
    +
    4 - 3 * -2
     

    First, without using OP_NEGATE. Then, without using OP_SUBTRACT.

    Given the above, do you think it makes sense to have both instructions? Why @@ -1082,12 +1082,12 @@

    Design Note: Register-Based Bytecode instructions can read their inputs from anywhere in the stack and can store their outputs into specific stack slots.

    Take this little Lox script:

    -
    var a = 1;
    +
    var a = 1;
     var b = 2;
     var c = a + b;
     

    In our stack-based VM, the last statement will get compiled to something like:

    -
    load <a>  // Read local variable a and push onto stack.
    +
    load <a>  // Read local variable a and push onto stack.
     load <b>  // Read local variable b and push onto stack.
     add       // Pop two values, add, push result.
     store <c> // Pop value and store in local variable c.
    @@ -1102,7 +1102,7 @@ 

    Design Note: Register-Based Bytecode

    In a register-based instruction set, instructions can read from and store directly into local variables. The bytecode for the last statement above looks like:

    -
    add <a> <b> <c> // Read values from a and b, add, store in c.
    +
    add <a> <b> <c> // Read values from a and b, add, store in c.
     

    The add instruction is biggerit has three instruction operands that define where in the stack it reads its inputs from and writes the result to. But since diff --git a/site/acknowledgements.html b/site/acknowledgements.html index 9f02491ff..39a0f1c81 100644 --- a/site/acknowledgements.html +++ b/site/acknowledgements.html @@ -1,5 +1,5 @@ - + Acknowledgements · Crafting Interpreters diff --git a/site/appendix-i.html b/site/appendix-i.html index 09cc10713..428295f9b 100644 --- a/site/appendix-i.html +++ b/site/appendix-i.html @@ -1,5 +1,5 @@ - + Appendix I · Crafting Interpreters @@ -89,12 +89,12 @@

    A1 . 1The syntactic grammar is used to parse the linear sequence of tokens into the nested syntax tree structure. It starts with the first rule that matches an entire Lox program (or a single REPL entry).

    -
    programdeclaration* EOF ;
    +
    programdeclaration* EOF ;
     

    A1 . 1 . 1Declarations

    A program is a series of declarations, which are the statements that bind new identifiers or any of the other statement types.

    -
    declarationclassDecl
    +
    declarationclassDecl
                    | funDecl
                    | varDecl
                    | statement ;
    @@ -107,7 +107,7 @@ 

    A1 . 1 .&

    A1 . 1 . 2Statements

    The remaining statement rules produce side effects, but do not introduce bindings.

    -
    statementexprStmt
    +
    statementexprStmt
                    | forStmt
                    | ifStmt
                    | printStmt
    @@ -133,7 +133,7 @@ 

    A1 . 1 . different levels of precedence. Some grammars for languages do not directly encode the precedence relationships and specify that elsewhere. Here, we use a separate rule for each precedence level to make it explicit.

    -
    expressionassignment ;
    +
    expressionassignment ;
     
     assignment     → ( call "." )? IDENTIFIER "=" assignment
                    | logic_or ;
    @@ -154,7 +154,7 @@ 

    A1 . 1 .

    A1 . 1 . 4Utility rules

    In order to keep the above rules a little cleaner, some of the grammar is split out into a few reused helper rules.

    -
    functionIDENTIFIER "(" parameters? ")" block ;
    +
    functionIDENTIFIER "(" parameters? ")" block ;
     parametersIDENTIFIER ( "," IDENTIFIER )* ;
     argumentsexpression ( "," expression )* ;
     
    @@ -162,7 +162,7 @@

    A1 . 2The lexical grammar is used by the scanner to group characters into tokens. Where the syntax is context free, the lexical grammar is regularnote that there are no recursive rules.

    -
    NUMBERDIGIT+ ( "." DIGIT+ )? ;
    +
    NUMBERDIGIT+ ( "." DIGIT+ )? ;
     STRING"\"" <any char except "\"">* "\"" ;
     IDENTIFIERALPHA ( ALPHA | DIGIT )* ;
     ALPHA"a" ... "z" | "A" ... "Z" | "_" ;
    diff --git a/site/appendix-ii.html b/site/appendix-ii.html
    index 1ba742f2d..cd3844486 100644
    --- a/site/appendix-ii.html
    +++ b/site/appendix-ii.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Appendix II · Crafting Interpreters
    @@ -86,9 +86,9 @@ 

    A2 . 1Exp Code”. The main Expr class defines the visitor interface used to dispatch against the specific expression types, and contains the other expression subclasses as nested classes.

    -
    lox/Expr.java
    +
    lox/Expr.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.List;
     
    @@ -118,9 +118,9 @@ 

    A2 . 1Exp

    A2 . 1 . 1Assign expression

    Variable assignment is introduced in “Statements and State”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Assign extends Expr {
    +
      static class Assign extends Expr {
         Assign(Token name, Expr value) {
           this.name = name;
           this.value = value;
    @@ -140,9 +140,9 @@ 

    A2 . 

    A2 . 1 . 2Binary expression

    Binary operators are introduced in “Representing Code”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Binary extends Expr {
    +
      static class Binary extends Expr {
         Binary(Expr left, Token operator, Expr right) {
           this.left = left;
           this.operator = operator;
    @@ -164,9 +164,9 @@ 

    A2 . 

    A2 . 1 . 3Call expression

    Function call expressions are introduced in “Functions”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Call extends Expr {
    +
      static class Call extends Expr {
         Call(Expr callee, Token paren, List<Expr> arguments) {
           this.callee = callee;
           this.paren = paren;
    @@ -188,9 +188,9 @@ 

    A2 . 1

    A2 . 1 . 4Get expression

    Property access, or “get” expressions are introduced in “Classes”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Get extends Expr {
    +
      static class Get extends Expr {
         Get(Expr object, Token name) {
           this.object = object;
           this.name = name;
    @@ -210,9 +210,9 @@ 

    A2 . 1̴

    A2 . 1 . 5Grouping expression

    Using parentheses to group expressions is introduced in “Representing Code”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Grouping extends Expr {
    +
      static class Grouping extends Expr {
         Grouping(Expr expression) {
           this.expression = expression;
         }
    @@ -230,9 +230,9 @@ 

    A2 .

    A2 . 1 . 6Literal expression

    Literal value expressions are introduced in “Representing Code”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Literal extends Expr {
    +
      static class Literal extends Expr {
         Literal(Object value) {
           this.value = value;
         }
    @@ -250,9 +250,9 @@ 

    A2 .̴

    A2 . 1 . 7Logical expression

    The logical and and or operators are introduced in “Control Flow”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Logical extends Expr {
    +
      static class Logical extends Expr {
         Logical(Expr left, Token operator, Expr right) {
           this.left = left;
           this.operator = operator;
    @@ -274,9 +274,9 @@ 

    A2 .̴

    A2 . 1 . 8Set expression

    Property assignment, or “set” expressions are introduced in “Classes”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Set extends Expr {
    +
      static class Set extends Expr {
         Set(Expr object, Token name, Expr value) {
           this.object = object;
           this.name = name;
    @@ -298,9 +298,9 @@ 

    A2 . 1̴

    A2 . 1 . 9Super expression

    The super expression is introduced in “Inheritance”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Super extends Expr {
    +
      static class Super extends Expr {
         Super(Token keyword, Token method) {
           this.keyword = keyword;
           this.method = method;
    @@ -319,9 +319,9 @@ 

    A2 . 1&

    A2 . 1 . 10This expression

    The this expression is introduced in “Classes”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class This extends Expr {
    +
      static class This extends Expr {
         This(Token keyword) {
           this.keyword = keyword;
         }
    @@ -338,9 +338,9 @@ 

    A2 . 1

    A2 . 1 . 11Unary expression

    Unary operators are introduced in “Representing Code”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Unary extends Expr {
    +
      static class Unary extends Expr {
         Unary(Token operator, Expr right) {
           this.operator = operator;
           this.right = right;
    @@ -360,9 +360,9 @@ 

    A2 . 1&

    A2 . 1 . 12Variable expression

    Variable access expressions are introduced in “Statements and State”.

    -
    lox/Expr.java
    +
    lox/Expr.java
    nest inside class Expr
    -
      static class Variable extends Expr {
    +
      static class Variable extends Expr {
         Variable(Token name) {
           this.name = name;
         }
    @@ -381,9 +381,9 @@ 

    A2 . 2State

    Statements form a second hierarchy of syntax tree nodes independent of expressions. We add the first couple of them in “Statements and State”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.List;
     
    @@ -410,9 +410,9 @@ 

    A2 . 2State

    A2 . 2 . 1Block statement

    The curly-braced block statement that defines a local scope is introduced in “Statements and State”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Block extends Stmt {
    +
      static class Block extends Stmt {
         Block(List<Stmt> statements) {
           this.statements = statements;
         }
    @@ -430,9 +430,9 @@ 

    A2 . 2

    A2 . 2 . 2Class statement

    Class declarations are introduced in, unsurprisingly, “Classes”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Class extends Stmt {
    +
      static class Class extends Stmt {
         Class(Token name,
               Expr.Variable superclass,
               List<Stmt.Function> methods) {
    @@ -456,9 +456,9 @@ 

    A2 . 2

    A2 . 2 . 3Expression statement

    The expression statement is introduced in “Statements and State”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Expression extends Stmt {
    +
      static class Expression extends Stmt {
         Expression(Expr expression) {
           this.expression = expression;
         }
    @@ -476,9 +476,9 @@ 

    A2 .&

    A2 . 2 . 4Function statement

    Function declarations are introduced in, you guessed it, “Functions”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Function extends Stmt {
    +
      static class Function extends Stmt {
         Function(Token name, List<Token> params, List<Stmt> body) {
           this.name = name;
           this.params = params;
    @@ -500,9 +500,9 @@ 

    A2 .̴

    A2 . 2 . 5If statement

    The if statement is introduced in “Control Flow”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class If extends Stmt {
    +
      static class If extends Stmt {
         If(Expr condition, Stmt thenBranch, Stmt elseBranch) {
           this.condition = condition;
           this.thenBranch = thenBranch;
    @@ -524,9 +524,9 @@ 

    A2 . 2 .&

    A2 . 2 . 6Print statement

    The print statement is introduced in “Statements and State”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Print extends Stmt {
    +
      static class Print extends Stmt {
         Print(Expr expression) {
           this.expression = expression;
         }
    @@ -544,9 +544,9 @@ 

    A2 . 2

    A2 . 2 . 7Return statement

    You need a function to return from, so return statements are introduced in “Functions”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Return extends Stmt {
    +
      static class Return extends Stmt {
         Return(Token keyword, Expr value) {
           this.keyword = keyword;
           this.value = value;
    @@ -566,9 +566,9 @@ 

    A2 . 2&

    A2 . 2 . 8Variable statement

    Variable declarations are introduced in “Statements and State”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class Var extends Stmt {
    +
      static class Var extends Stmt {
         Var(Token name, Expr initializer) {
           this.name = name;
           this.initializer = initializer;
    @@ -588,9 +588,9 @@ 

    A2 .̴

    A2 . 2 . 9While statement

    The while statement is introduced in “Control Flow”.

    -
    lox/Stmt.java
    +
    lox/Stmt.java
    nest inside class Stmt
    -
      static class While extends Stmt {
    +
      static class While extends Stmt {
         While(Expr condition, Stmt body) {
           this.condition = condition;
           this.body = body;
    diff --git a/site/backmatter.html b/site/backmatter.html
    index 5f64047ce..fe44beb8b 100644
    --- a/site/backmatter.html
    +++ b/site/backmatter.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Backmatter · Crafting Interpreters
    diff --git a/site/calls-and-functions.html b/site/calls-and-functions.html
    index b3131ef36..a5c6751c5 100644
    --- a/site/calls-and-functions.html
    +++ b/site/calls-and-functions.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Calls and Functions · Crafting Interpreters
    @@ -128,7 +128,7 @@ 

    24 . 1< little higher level. I think a cleaner model is to give each function its own Chunk. We’ll want some other metadata too, so let’s go ahead and stuff it all in a struct now.

    -
      struct Obj* next;
    +
      struct Obj* next;
     };
     
    object.h
    add after struct Obj
    @@ -157,7 +157,7 @@

    24 . 1<

    This is the first time the “object” module has needed to reference Chunk, so we get an include.

    -
    #include "common.h"
    +
    #include "common.h"
     
    object.h
    #include "chunk.h"
     
    #include "value.h"
    @@ -167,7 +167,7 @@ 

    24 . 1<

    Like we did with strings, we define some accessories to make Lox functions easier to work with in C. Sort of a poor man’s object orientation. First, we’ll declare a C function to create a new Lox function.

    -
      uint32_t hash;
    +
      uint32_t hash;
     };
     
     
    object.h
    @@ -178,9 +178,9 @@

    24 . 1<
    object.h, add after struct ObjString

    The implementation is over here:

    -
    object.c
    +
    object.c
    add after allocateObject()
    -
    ObjFunction* newFunction() {
    +
    ObjFunction* newFunction() {
       ObjFunction* function = ALLOCATE_OBJ(ObjFunction, OBJ_FUNCTION);
       function->arity = 0;
       function->name = NULL;
    @@ -196,7 +196,7 @@ 

    24 . 1< set the function up in a sort of blank statezero arity, no name, and no code. That will get filled in later after the function is created.

    Since we have a new kind of object, we need a new object type in the enum.

    -
    typedef enum {
    +
    typedef enum {
     
    object.h
    in enum ObjType
      OBJ_FUNCTION,
    @@ -207,7 +207,7 @@ 

    24 . 1<

    When we’re done with a function object, we must return the bits it borrowed back to the operating system.

    -
      switch (object->type) {
    +
      switch (object->type) {
     
    memory.c
    in freeObject()
        case OBJ_FUNCTION: {
    @@ -230,7 +230,7 @@ 

    24 . 1<

    Lox lets you print any object, and functions are first-class objects, so we need to handle them too.

    -
      switch (OBJ_TYPE(value)) {
    +
      switch (OBJ_TYPE(value)) {
     
    object.c
    in printObject()
        case OBJ_FUNCTION:
    @@ -241,9 +241,9 @@ 

    24 . 1<
    object.c, in printObject()

    This calls out to:

    -
    object.c
    +
    object.c
    add after copyString()
    -
    static void printFunction(ObjFunction* function) {
    +
    static void printFunction(ObjFunction* function) {
       printf("<fn %s>", function->name->chars);
     }
     
    @@ -252,7 +252,7 @@

    24 . 1<

    Since a function knows its name, it may as well say it.

    Finally, we have a couple of macros for converting values to functions. First, make sure your value actually is a function.

    -
    #define OBJ_TYPE(value)        (AS_OBJ(value)->type)
    +
    #define OBJ_TYPE(value)        (AS_OBJ(value)->type)
     
     
    object.h
    #define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
    @@ -262,7 +262,7 @@ 

    24 . 1<

    Assuming that evaluates to true, you can then safely cast the Value to an ObjFunction pointer using this:

    -
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
    +
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
     
     
    object.h
    #define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
    @@ -294,7 +294,7 @@ 

    support that implicit top-level function. It starts with the Compiler struct. Instead of pointing directly to a Chunk that the compiler writes to, it instead has a reference to the function object being built.

    -
    typedef struct {
    +
    typedef struct {
     
    compiler.c
    in struct Compiler
      ObjFunction* function;
    @@ -308,9 +308,9 @@ 

    compiling top-level code versus the body of a function. Most of the compiler doesn’t care about thisthat’s why it’s a useful abstractionbut in one or two places the distinction is meaningful. We’ll get to one later.

    -
    compiler.c
    +
    compiler.c
    add after struct Local
    -
    typedef enum {
    +
    typedef enum {
       TYPE_FUNCTION,
       TYPE_SCRIPT
     } FunctionType;
    @@ -327,7 +327,7 @@ 

    we’d need to change the code later. But, really, it’s because I wrote all the code for the book before any of the text.

    -
    Compiler* current = NULL;
    +
    Compiler* current = NULL;
     
    compiler.c
    add after variable current
    replace 5 lines
    @@ -349,7 +349,7 @@

    24 . 2 . 1Creating functions at compile time

    We start threading this through in compile(), which is the main entry point into the compiler.

    -
      Compiler compiler;
    +
      Compiler compiler;
     
    compiler.c
    in compile()
    replace 2 lines
    @@ -362,7 +362,7 @@

    compiler.c
    +

    Then we allocate a new function object to compile into.

    -
      compiler->scopeDepth = 0;
    +
      compiler->scopeDepth = 0;
     
    compiler.c
    in initCompiler()
      compiler->function = newFunction();
    @@ -400,7 +400,7 @@ 

      current = compiler;
    +
      current = compiler;
     
    compiler.c
    in initCompiler()
    @@ -420,7 +420,7 @@ 

    compiler.c
    +
    compiler.c
    function endCompiler()
    replace 1 line
    static ObjFunction* endCompiler() {
    @@ -431,7 +431,7 @@ 

      emitReturn();
    +
     
     

    And then return it to compile() like so:

    -
    #endif
    +
    #endif
     
    compiler.c
    in endCompiler()
    @@ -455,7 +455,7 @@ 

    #ifdef DEBUG_PRINT_CODE
    +
    #ifdef DEBUG_PRINT_CODE
       if (!parser.hadError) {
     
    compiler.c
    in endCompiler()
    @@ -471,7 +471,7 @@

    static void printFunction(ObjFunction* function) {
    +
    static void printFunction(ObjFunction* function) {
     
    object.c
    in printFunction()
      if (function->name == NULL) {
    @@ -490,7 +490,7 @@ 

    #include "vm.h"
    +
    #include "vm.h"
     
     
    compiler.h
    function compile()
    @@ -504,7 +504,7 @@

    compiler.c
    +
    compiler.c
    function compile()
    replace 1 line
    ObjFunction* compile(const char* source) {
    @@ -514,7 +514,7 @@ 

      while (!match(TOKEN_EOF)) {
    +
      while (!match(TOKEN_EOF)) {
         declaration();
       }
     
    @@ -569,7 +569,7 @@ 

    -
    fun first() {
    +
    fun first() {
       var a = 1;
       second();
       var b = 2;
    @@ -599,7 +599,7 @@ 

    -
    fun first() {
    +
    fun first() {
       var a = 1;
       second();
       var b = 2;
    @@ -666,7 +666,7 @@ 

    24 . 3̴ need to track where on the stack that function’s locals begin, and where the caller should resume. We’ll put this, along with some other stuff, in a new struct.

    -
    #define STACK_MAX 256
    +
    #define STACK_MAX 256
     
    vm.h
     
    @@ -704,7 +704,7 @@ 

    24 . 3̴

    So over in the VM, we create an array of these CallFrame structs up front and treat it as a stack, like we do with the value array.

    -
    typedef struct {
    +
    typedef struct {
     
    vm.h
    in struct VM
    replace 2 lines
    @@ -722,7 +722,7 @@

    24 . 3̴ stackthe number of ongoing function calls. To keep clox simple, the array’s capacity is fixed. This means, as in many language implementations, there is a maximum call depth we can handle. For clox, it’s defined here:

    -
    #include "value.h"
    +
    #include "value.h"
     
     
    vm.h
    replace 1 line
    @@ -742,7 +742,7 @@

    24 . 3̴ temporaries in addition to locals. A robust implementation would guard against this, but I’m trying to keep things simple.

    -
      vm.stackTop = vm.stack;
    +
      vm.stackTop = vm.stack;
     
    vm.c
    in resetStack()
      vm.frameCount = 0;
    @@ -751,7 +751,7 @@ 

    24 . 3̴
    vm.c, in resetStack()

    The “vm.h” header needs access to ObjFunction, so we add an include.

    -
    #define clox_vm_h
    +
    #define clox_vm_h
     
     
    vm.h
    replace 1 line
    @@ -766,7 +766,7 @@

    24 . 3̴ handle that. Also, the instructions that access local variables by stack slot need to be updated to do so relative to the current CallFrame’s slots field.

    We’ll start at the top and plow through it.

    -
    static InterpretResult run() {
    +
    static InterpretResult run() {
     
    vm.c
    in run()
    replace 4 lines
    @@ -797,7 +797,7 @@

    24 . 3̴ will do this, but there’s a good chance it will.

    Now onto each instruction that needs a little tender loving care.

    -
          case OP_GET_LOCAL: {
    +
          case OP_GET_LOCAL: {
             uint8_t slot = READ_BYTE();
     
    vm.c
    in run()
    @@ -812,7 +812,7 @@

    24 . 3̴ stack. Now, it accesses the current frame’s slots array, which means it accesses the given numbered slot relative to the beginning of that frame.

    Setting a local variable works the same way.

    -
          case OP_SET_LOCAL: {
    +
          case OP_SET_LOCAL: {
             uint8_t slot = READ_BYTE();
     
    vm.c
    in run()
    @@ -824,7 +824,7 @@

    24 . 3̴

    The jump instructions used to modify the VM’s ip field. Now, they do the same for the current frame’s ip.

    -
          case OP_JUMP: {
    +
          case OP_JUMP: {
             uint16_t offset = READ_SHORT();
     
    vm.c
    in run()
    @@ -835,7 +835,7 @@

    24 . 3̴
    vm.c, in run(), replace 1 line

    Same with the conditional jump:

    -
          case OP_JUMP_IF_FALSE: {
    +
          case OP_JUMP_IF_FALSE: {
             uint16_t offset = READ_SHORT();
     
    vm.c
    in run()
    @@ -846,7 +846,7 @@

    24 . 3̴
    vm.c, in run(), replace 1 line

    And our backward-jumping loop instruction:

    -
          case OP_LOOP: {
    +
          case OP_LOOP: {
             uint16_t offset = READ_SHORT();
     
    vm.c
    in run()
    @@ -858,7 +858,7 @@

    24 . 3̴

    We have some diagnostic code that prints each instruction as it executes to help us debug our VM. That needs to work with the new structure too.

    -
        printf("\n");
    +
        printf("\n");
     
    vm.c
    in run()
    replace 2 lines
    @@ -873,7 +873,7 @@

    24 . 3̴

    You know, that wasn’t too bad, actually. Most instructions just use the macros so didn’t need to be touched. Next, we jump up a level to the code that calls run().

    -
    InterpretResult interpret(const char* source) {
    +
    InterpretResult interpret(const char* source) {
     
    vm.c
    in interpret()
    replace 10 lines
    @@ -904,7 +904,7 @@

    24 . 3̴

    This gets the interpreter ready to start executing code. After finishing, the VM used to free the hardcoded chunk. Now that the ObjFunction owns that code, we don’t need to do that anymore, so the end of interpret() is simply this:

    -
      frame->slots = vm.stack;
    +
      frame->slots = vm.stack;
     
     
    vm.c
    in interpret()
    @@ -916,7 +916,7 @@

    24 . 3̴

    The last piece of code referring to the old VM fields is runtimeError(). We’ll revisit that later in the chapter, but for now let’s change it to this:

    -
      fputs("\n", stderr);
    +
      fputs("\n", stderr);
     
     
    vm.c
    in runtimeError()
    @@ -943,7 +943,7 @@

    24 

    Yes, I am going to make a dumb joke about the fun keyword every time it comes up.

    -
    static void declaration() {
    +
    static void declaration() {
     
    compiler.c
    in declaration()
    replace 1 line
    @@ -955,9 +955,9 @@

    24 
    compiler.c, in declaration(), replace 1 line

    That passes control to here:

    -
    compiler.c
    +
    compiler.c
    add after block()
    -
    static void funDeclaration() {
    +
    static void funDeclaration() {
       uint8_t global = parseVariable("Expect function name.");
       markInitialized();
       function(TYPE_FUNCTION);
    @@ -984,7 +984,7 @@ 

    24  soon as we compile the name, before we compile the body. That way the name can be referenced inside the body without generating an error.

    We do need one check, though.

    -
    static void markInitialized() {
    +
    static void markInitialized() {
     
    compiler.c
    in markInitialized()
      if (current->scopeDepth == 0) return;
    @@ -1004,9 +1004,9 @@ 

    24 

    I split out the code to compile the parameters and body because we’ll reuse it later for parsing method declarations inside classes. Let’s build it incrementally, starting with this:

    -
    compiler.c
    +
    compiler.c
    add after block()
    -
    static void function(FunctionType type) {
    +
    static void function(FunctionType type) {
       Compiler compiler;
       initCompiler(&compiler, type);
       beginScope(); 
    @@ -1056,7 +1056,7 @@ 

    24 .& the Value and CallFrame stacks in the VM, we won’t use an array. Instead, we use a linked list. Each Compiler points back to the Compiler for the function that encloses it, all the way back to the root Compiler for the top-level code.

    -
    } FunctionType;
    +
    } FunctionType;
     
     
    compiler.c
    add after enum FunctionType
    @@ -1072,7 +1072,7 @@

    24 .& and use that for the field’s type. C is weird.

    When initializing a new Compiler, we capture the about-to-no-longer-be-current one in that pointer.

    -
    static void initCompiler(Compiler* compiler, FunctionType type) {
    +
    static void initCompiler(Compiler* compiler, FunctionType type) {
     
    compiler.c
    in initCompiler()
      compiler->enclosing = current;
    @@ -1082,7 +1082,7 @@ 

    24 .&

    Then when a Compiler finishes, it pops itself off the stack by restoring the previous compiler to be the new current one.

    -
    #endif
    +
    #endif
     
     
    compiler.c
    in endCompiler()
    @@ -1107,7 +1107,7 @@

    24 .&

    24 . 4 . 2Function parameters

    Functions aren’t very useful if you can’t pass arguments to them, so let’s do parameters next.

    -
      consume(TOKEN_LEFT_PAREN, "Expect '(' after function name.");
    +
      consume(TOKEN_LEFT_PAREN, "Expect '(' after function name.");
     
    compiler.c
    in function()
      if (!check(TOKEN_RIGHT_PAREN)) {
    @@ -1135,7 +1135,7 @@ 

    24 . compiling a function declaration, we call initCompiler() right after we parse the function’s name. That means we can grab the name right then from the previous token.

    -
      current = compiler;
    +
      current = compiler;
     
    compiler.c
    in initCompiler()
      if (type != TYPE_SCRIPT) {
    @@ -1154,7 +1154,7 @@ 

    24 . compiler outlives the compiler and persists until runtime. So it needs its own heap-allocated name string that it can keep around.

    Rad. Now we can compile function declarations, like this:

    -
    fun areWeHavingItYet() {
    +
    fun areWeHavingItYet() {
       print "Yes we are!";
     }
     
    @@ -1173,7 +1173,7 @@ 

    24 . 5) to wrap it up at the end.

    That odd grammatical perspective explains how to hook the syntax into our parsing table.

    -
    ParseRule rules[] = {
    +
    ParseRule rules[] = {
     
    compiler.c
    add after unary()
    replace 1 line
    @@ -1184,9 +1184,9 @@

    24 . 5When the parser encounters a left parenthesis following an expression, it dispatches to a new parser function.

    -
    compiler.c
    +
    compiler.c
    add after binary()
    -
    static void call(bool canAssign) {
    +
    static void call(bool canAssign) {
       uint8_t argCount = argumentList();
       emitBytes(OP_CALL, argCount);
     }
    @@ -1199,9 +1199,9 @@ 

    24 . 5OP_CALL instruction to invoke the function, using the argument count as an operand.

    We compile the arguments using this friend:

    -
    compiler.c
    +
    compiler.c
    add after defineVariable()
    -
    static uint8_t argumentList() {
    +
    static uint8_t argumentList() {
       uint8_t argCount = 0;
       if (!check(TOKEN_RIGHT_PAREN)) {
         do {
    @@ -1223,7 +1223,7 @@ 

    24 . 5—since we stuff the argument count into the bytecode as a single-byte operand, we can only go up to 255. We need to verify that in this compiler too.

    -
          expression();
    +
          expression();
     
    compiler.c
    in argumentList()
          if (argCount == 255) {
    @@ -1235,7 +1235,7 @@ 

    24 . 5That’s the front end. Let’s skip over to the back end, with a quick stop in the middle to declare the new instruction.

    -
      OP_LOOP,
    +
      OP_LOOP,
     
    chunk.h
    in enum OpCode
      OP_CALL,
    @@ -1248,7 +1248,7 @@ 

    fun sum(a, b, c) {
    +
    fun sum(a, b, c) {
       return a + b + c;
     }
     
    @@ -1287,7 +1287,7 @@ 

          }
    +
          }
     
    vm.c
    in run()
          case OP_CALL: {
    @@ -1310,7 +1310,7 @@ 

              return INTERPRET_RUNTIME_ERROR;
    +
              return INTERPRET_RUNTIME_ERROR;
             }
     
    vm.c
    in run()
    @@ -1323,9 +1323,9 @@

    var notAFunction = 123;
    +
    var notAFunction = 123;
     notAFunction();
     

    If that happens, the runtime needs to safely report an error and halt. So the first thing we do is check the type of the value that we’re trying to call. If it’s not a function, we error out. Otherwise, the actual call happens here:

    -
    vm.c
    +
    vm.c
    add after peek()
    -
    static bool call(ObjFunction* function, int argCount) {
    +
    static bool call(ObjFunction* function, int argCount) {
       CallFrame* frame = &vm.frames[vm.frameCount++];
       frame->function = function;
       frame->ip = function->chunk.code;
    @@ -1374,7 +1374,7 @@ 

          return jumpInstruction("OP_LOOP", -1, chunk, offset);
    +
          return jumpInstruction("OP_LOOP", -1, chunk, offset);
     
    debug.c
    in disassembleInstruction()
        case OP_CALL:
    @@ -1386,7 +1386,7 @@ 

      push(OBJ_VAL(function));
    +
      push(OBJ_VAL(function));
     
    vm.c
    in interpret()
    replace 4 lines
    @@ -1404,7 +1404,7 @@

    24̴ Lox ain’t statically typed, a foolish user could pass too many or too few arguments. In Lox, we’ve defined that to be a runtime error, which we report like so:

    -
    static bool call(ObjFunction* function, int argCount) {
    +
    static bool call(ObjFunction* function, int argCount) {
     
    vm.c
    in call()
      if (argCount != function->arity) {
    @@ -1422,7 +1422,7 @@ 

    24̴

    There’s another error we need to report that’s less to do with the user’s foolishness than our own. Because the CallFrame array has a fixed size, we need to ensure a deep call chain doesn’t overflow it.

    -
      }
    +
      }
     
     
    vm.c
    in call()
    @@ -1448,7 +1448,7 @@

    24  and we’ve conveniently stored each function’s name, we can show that entire stack when a runtime error disrupts the harmony of the user’s existence. It looks like this:

    -
      fputs("\n", stderr);
    +
      fputs("\n", stderr);
     
     
    vm.c
    in runtimeError()
    @@ -1495,7 +1495,7 @@

    24  do that.

    For example, if you run this broken program:

    -
    fun a() { b(); }
    +
     

    It prints out:

    -
    Expected 0 arguments but got 2.
    +
    Expected 0 arguments but got 2.
     [line 4] in c()
     [line 2] in b()
     [line 1] in a()
    @@ -1517,7 +1517,7 @@ 

    24& some time, but it’s always had some kind of temporary code hanging out in it just to get us out of the bytecode loop. The time has arrived for a real implementation.

    -
          case OP_RETURN: {
    +
          case OP_RETURN: {
     
    vm.c
    in run()
    replace 2 lines
    @@ -1554,7 +1554,7 @@

    24& right where it left off, immediately after the OP_CALL instruction.

    Each step of the return process: popping the return value, discarding the call frame, pushing the return value.

    Note that we assume here that the function did actually return a value, but a function can implicitly return by reaching the end of its body:

    -
    fun noReturn() {
    +
     

    We need to handle that correctly too. The language is specified to implicitly return nil in that case. To make that happen, we add this:

    -
    static void emitReturn() {
    +
    static void emitReturn() {
     
    compiler.c
    in emitReturn()
      emitByte(OP_NIL);
    @@ -1579,7 +1579,7 @@ 

    24&

    24 . 6Return Statements

    If you want a function that returns something other than the implicit nil, you need a return statement. Let’s get that working.

    -
        ifStatement();
    +
        ifStatement();
     
    compiler.c
    in statement()
      } else if (match(TOKEN_RETURN)) {
    @@ -1589,9 +1589,9 @@ 

    24 . 
    compiler.c, in statement()

    When the compiler sees a return keyword, it goes here:

    -
    compiler.c
    +
    compiler.c
    add after printStatement()
    -
    static void returnStatement() {
    +
    static void returnStatement() {
       if (match(TOKEN_SEMICOLON)) {
         emitReturn();
       } else {
    @@ -1623,7 +1623,7 @@ 

    24 .  compile error to worry about. Returns are useful for returning from functions but the top level of a Lox program is imperative code too. You shouldn’t be able to return from there.

    -
    return "What?!";
    +
    return "What?!";
     

    We’ve specified that it’s a compile error to have a return statement outside of any function, which we implement like so:

    -
    static void returnStatement() {
    +
    static void returnStatement() {
     
    compiler.c
    in returnStatement()
      if (current->type == TYPE_SCRIPT) {
    @@ -1672,7 +1672,7 @@ 

    24 . 7< a piece of native C code.

    We handle this in clox by defining native functions as an entirely different object type.

    -
    } ObjFunction;
    +
    } ObjFunction;
     
    object.h
    add after struct ObjFunction
    @@ -1696,7 +1696,7 @@ 

    24 . 7< returns the result value.

    As always, a new object type carries some accoutrements with it. To create an ObjNative, we declare a constructor-like function.

    -
    ObjFunction* newFunction();
    +
    ObjFunction* newFunction();
     
    object.h
    add after newFunction()
    ObjNative* newNative(NativeFn function);
    @@ -1705,9 +1705,9 @@ 

    24 . 7<
    object.h, add after newFunction()

    We implement that like so:

    -
    object.c
    +
    object.c
    add after newFunction()
    -
    ObjNative* newNative(NativeFn function) {
    +
    ObjNative* newNative(NativeFn function) {
       ObjNative* native = ALLOCATE_OBJ(ObjNative, OBJ_NATIVE);
       native->function = function;
       return native;
    @@ -1718,7 +1718,7 @@ 

    24 . 7<

    The constructor takes a C function pointer to wrap in an ObjNative. It sets up the object header and stores the function. For the header, we need a new object type.

    -
    typedef enum {
    +
    typedef enum {
       OBJ_FUNCTION,
     
    object.h
    in enum ObjType
    @@ -1729,7 +1729,7 @@

    24 . 7<
    object.h, in enum ObjType

    The VM also needs to know how to deallocate a native function object.

    -
        }
    +
        }
     
    memory.c
    in freeObject()
        case OBJ_NATIVE:
    @@ -1741,7 +1741,7 @@ 

    24 . 7<

    There isn’t much here since ObjNative doesn’t own any extra memory. The other capability all Lox objects support is being printed.

    -
          break;
    +
          break;
     
    object.c
    in printObject()
        case OBJ_NATIVE:
    @@ -1753,7 +1753,7 @@ 

    24 . 7<

    In order to support dynamic typing, we have a macro to see if a value is a native function.

    -
    #define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
    +
    #define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
     
    object.h
    #define IS_NATIVE(value)       isObjType(value, OBJ_NATIVE)
     
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
    @@ -1762,7 +1762,7 @@ 

    24 . 7<

    Assuming that returns true, this macro extracts the C function pointer from a Value representing a native function:

    -
    #define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
    +
    #define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
     
    object.h
    #define AS_NATIVE(value) \
         (((ObjNative*)AS_OBJ(value))->function)
    @@ -1775,7 +1775,7 @@ 

    24 . 7< etc. Of course, the operation we actually care about is calling themusing one as the left-hand operand in a call expression.

    Over in callValue() we add another type case.

    -
          case OBJ_FUNCTION: 
    +
          case OBJ_FUNCTION: 
             return call(AS_FUNCTION(callee), argCount);
     
    vm.c
    in callValue()
    @@ -1798,9 +1798,9 @@

    24 . 7< to call. Without something like a foreign function interface, users can’t define their own native functions. That’s our job as VM implementers. We’ll start with a helper to define a new native function exposed to Lox programs.

    -
    vm.c
    +
    vm.c
    add after runtimeError()
    -
    static void defineNative(const char* name, NativeFn function) {
    +
    static void defineNative(const char* name, NativeFn function) {
       push(OBJ_VAL(copyString(name, (int)strlen(name))));
       push(OBJ_VAL(newNative(function)));
       tableSet(&vm.globals, AS_STRING(vm.stack[0]), vm.stack[1]);
    @@ -1827,9 +1827,9 @@ 

    24 . 7<

    It feels silly, but after all of that work, we’re going to add only one little native function.

    -
    vm.c
    +
    vm.c
    add after variable vm
    -
    static Value clockNative(int argCount, Value* args) {
    +
    static Value clockNative(int argCount, Value* args) {
       return NUMBER_VAL((double)clock() / CLOCKS_PER_SEC);
     }
     
    @@ -1837,7 +1837,7 @@

    24 . 7<

    This returns the elapsed time since the program started running, in seconds. It’s handy for benchmarking Lox programs. In Lox, we’ll name it clock().

    -
      initTable(&vm.strings);
    +
      initTable(&vm.strings);
     
    vm.c
    in initVM()
    @@ -1849,7 +1849,7 @@ 

    24 . 7<

    To get to the C standard library clock() function, the “vm” module needs an include.

    -
    #include <string.h>
    +
    #include <string.h>
     
    vm.c
    #include <time.h>
     
    @@ -1860,7 +1860,7 @@ 

    24 . 7<

    That was a lot of material to work through, but we did it! Type this in and try it out:

    -
    fun fib(n) {
    +
    fun fib(n) {
       if (n < 2) return n;
       return fib(n - 2) + fib(n - 1);
     }
    diff --git a/site/chunks-of-bytecode.html b/site/chunks-of-bytecode.html
    index fbf4f8997..89318dfb9 100644
    --- a/site/chunks-of-bytecode.html
    +++ b/site/chunks-of-bytecode.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Chunks of Bytecode · Crafting Interpreters
    @@ -120,7 +120,7 @@ 

    Chunks of Bytecode

    slow. A tree-walk interpreter is fine for some kinds of high-level, declarative languages. But for a general-purpose, imperative languageeven a “scripting” language like Loxit won’t fly. Take this little script:

    -
    fun fib(n) {
    +
    fun fib(n) {
       if (n < 2) return n;
       return fib(n - 1) + fib(n - 2); 
     }
    @@ -302,9 +302,9 @@ 

    14 . 2Now is a good time to stretch, maybe crack your knuckles. A little montage music wouldn’t hurt either.

    -
    main.c
    +
    main.c
    create new file
    -
    #include "common.h"
    +
    #include "common.h"
     
     int main(int argc, const char* argv[]) {
       return 0;
    @@ -315,9 +315,9 @@ 

    14 . 2From this tiny seed, we will grow our entire VM. Since C provides us with so little, we first need to spend some time amending the soil. Some of that goes into this header:

    -
    common.h
    +
    common.h
    create new file
    -
    #ifndef clox_common_h
    +
    #ifndef clox_common_h
     #define clox_common_h
     
     #include <stdbool.h>
    @@ -335,9 +335,9 @@ 

    14̴

    Next, we need a module to define our code representation. I’ve been using “chunk” to refer to sequences of bytecode, so let’s make that the official name for that module.

    -
    chunk.h
    +
    chunk.h
    create new file
    -
    #ifndef clox_chunk_h
    +
    #ifndef clox_chunk_h
     #define clox_chunk_h
     
     #include "common.h"
    @@ -350,7 +350,7 @@ 

    14̴ (universally shortened to opcode). That number controls what kind of instruction we’re dealing withadd, subtract, look up variable, etc. We define those here:

    -
    #include "common.h"
    +
    #include "common.h"
     
    chunk.h
     
    @@ -371,7 +371,7 @@ 

    } OpCode;
    +
    } OpCode;
     
    chunk.h
    add after enum OpCode
    @@ -410,7 +410,7 @@ 

    typedef struct {
    +
    typedef struct {
     
    chunk.h
    in struct Chunk
      int count;
    @@ -449,7 +449,7 @@ 

    } Chunk;
    +
     
     

    And implement it thusly:

    -
    chunk.c
    +
    chunk.c
    create new file
    -
    #include <stdlib.h>
    +
    #include <stdlib.h>
     
     #include "chunk.h"
     
    @@ -478,7 +478,7 @@ 

    void initChunk(Chunk* chunk);
    +
     
     

    This is where the interesting work happens.

    -
    chunk.c
    +
    chunk.c
    add after initChunk()
    -
    void writeChunk(Chunk* chunk, uint8_t byte) {
    +
    void writeChunk(Chunk* chunk, uint8_t byte) {
       if (chunk->capacity < chunk->count + 1) {
         int oldCapacity = chunk->capacity;
         chunk->capacity = GROW_CAPACITY(oldCapacity);
    @@ -512,7 +512,7 @@ 

    #include "chunk.h"
    +
     
     

    This is enough to get us started.

    -
    memory.h
    +
    memory.h
    create new file
    -
    #ifndef clox_memory_h
    +
    #ifndef clox_memory_h
     #define clox_memory_h
     
     #include "common.h"
    @@ -552,7 +552,7 @@ 

    #define GROW_CAPACITY(capacity) \
    +
    #define GROW_CAPACITY(capacity) \
         ((capacity) < 8 ? 8 : (capacity) * 2)
     
    memory.h
    @@ -609,9 +609,9 @@ 

    memory.c
    +
    memory.c
    create new file
    -
    #include <stdlib.h>
    +
    #include <stdlib.h>
     
     #include "memory.h"
     
    @@ -646,7 +646,7 @@ 

      void* result = realloc(pointer, newSize);
    +
      void* result = realloc(pointer, newSize);
     
    memory.c
    in reallocate()
      if (result == NULL) exit(1);
    @@ -671,7 +671,7 @@ 

    void initChunk(Chunk* chunk);
    +
     
     

    The implementation is:

    -
    chunk.c
    +
    chunk.c
    add after initChunk()
    -
    void freeChunk(Chunk* chunk) {
    +
    void freeChunk(Chunk* chunk) {
       FREE_ARRAY(uint8_t, chunk->code, chunk->capacity);
       initChunk(chunk);
     }
    @@ -692,7 +692,7 @@ 

    #define GROW_ARRAY(type, pointer, oldCount, newCount) \
    +
    #define GROW_ARRAY(type, pointer, oldCount, newCount) \
         (type*)reallocate(pointer, sizeof(type) * (oldCount), \
             sizeof(type) * (newCount))
     
    memory.h
    @@ -714,7 +714,7 @@

    14 . 4Disassembling Chunks

    Now we have a little module for creating chunks of bytecode. Let’s try it out by hand-building a sample chunk.

    -
    int main(int argc, const char* argv[]) {
    +
    int main(int argc, const char* argv[]) {
     
    main.c
    in main()
      Chunk chunk;
    @@ -726,7 +726,7 @@ 

    14 .&
    main.c, in main()

    Don’t forget the include.

    -
    #include "common.h"
    +
    #include "common.h"
     
    main.c
    #include "chunk.h"
     
    @@ -751,7 +751,7 @@ 

    14 .&

    In jlox, our analogous tool was the AstPrinter class.

    In main(), after we create the chunk, we pass it to the disassembler.

    -
      initChunk(&chunk);
    +
      initChunk(&chunk);
       writeChunk(&chunk, OP_RETURN);
     
    main.c
    in main()
    @@ -766,7 +766,7 @@

    14 .& -
    #include "chunk.h"
    +
    #include "chunk.h"
     
    main.c
    #include "debug.h"
     
    @@ -776,9 +776,9 @@ 

    14 .&
    main.c

    Here’s that header:

    -
    debug.h
    +
    debug.h
    create new file
    -
    #ifndef clox_debug_h
    +
    #ifndef clox_debug_h
     #define clox_debug_h
     
     #include "chunk.h"
    @@ -795,9 +795,9 @@ 

    14 .& just disassembles a single instruction. It shows up here in the header because we’ll call it from the VM in later chapters.

    Here’s a start at the implementation file:

    -
    debug.c
    +
    debug.c
    create new file
    -
    #include <stdio.h>
    +
    #include <stdio.h>
     
     #include "debug.h"
     
    @@ -819,9 +819,9 @@ 

    14 .& offset, it returns the offset of the next instruction. This is because, as we’ll see later, instructions can have different sizes.

    The core of the “debug” module is this function:

    -
    debug.c
    +
    debug.c
    add after disassembleChunk()
    -
    int disassembleInstruction(Chunk* chunk, int offset) {
    +
    int disassembleInstruction(Chunk* chunk, int offset) {
       printf("%04d ", offset);
     
       uint8_t instruction = chunk->code[offset];
    @@ -849,9 +849,9 @@ 

    14 .&

    We have only one instruction right now, but this switch will grow throughout the rest of the book.

    -
    debug.c
    +
    debug.c
    add after disassembleChunk()
    -
    static int simpleInstruction(const char* name, int offset) {
    +
    static int simpleInstruction(const char* name, int offset) {
       printf("%s\n", name);
       return offset + 1;
     }
    @@ -862,7 +862,7 @@ 

    14 .& the opcode, then return the next byte offset past this instruction. Other instructions will have more going on.

    If we run our nascent interpreter now, it actually prints something:

    -
    == test chunk ==
    +
    == test chunk ==
     0000 OP_RETURN
     

    It worked! This is sort of the “Hello, world!” of our code representation. We @@ -873,7 +873,7 @@

    14 . 5Constan

    Now that we have a rudimentary chunk structure working, let’s start making it more useful. We can store code in chunks, but what about data? Many values the interpreter works with are created at runtime as the result of operations.

    -
    1 + 2;
    +
    1 + 2;
     

    The value 3 appears nowhere in the code here. However, the literals 1 and 2 do. To compile that statement to bytecode, we need some sort of instruction that @@ -887,9 +887,9 @@

    14 .

    For now, we’re going to start as simple as possiblewe’ll support only double-precision, floating-point numbers. This will obviously expand over time, so we’ll set up a new module to give ourselves room to grow.

    -
    value.h
    +
    value.h
    create new file
    -
    #ifndef clox_value_h
    +
    #ifndef clox_value_h
     #define clox_value_h
     
     #include "common.h"
    @@ -937,7 +937,7 @@ 

    14 . 5 .& macros to fake generics, but that’s overkill for clox. We won’t need many more of these.

    -
    typedef double Value;
    +
    typedef double Value;
     
    value.h
     
    @@ -955,7 +955,7 @@ 

    14 . 5 .&

    As with the bytecode array in Chunk, this struct wraps a pointer to an array along with its allocated capacity and the number of elements in use. We also need the same three functions to work with value arrays.

    -
    } ValueArray;
    +
    } ValueArray;
     
    value.h
    add after struct ValueArray
    @@ -970,9 +970,9 @@ 

    14 . 5 .&
    value.h, add after struct ValueArray

    The implementations will probably give you déjà vu. First, to create a new one:

    -
    value.c
    +
    value.c
    create new file
    -
    #include <stdio.h>
    +
    #include <stdio.h>
     
     #include "memory.h"
     #include "value.h"
    @@ -990,9 +990,9 @@ 

    14 . 5 .& -
    value.c
    +
    value.c
    add after initValueArray()
    -
    void writeValueArray(ValueArray* array, Value value) {
    +
    void writeValueArray(ValueArray* array, Value value) {
       if (array->capacity < array->count + 1) {
         int oldCapacity = array->capacity;
         array->capacity = GROW_CAPACITY(oldCapacity);
    @@ -1009,9 +1009,9 @@ 

    14 . 5 .&

    The memory-management macros we wrote earlier do let us reuse some of the logic from the code array, so this isn’t too bad. Finally, to release all memory used by the array:

    -
    value.c
    +
    value.c
    add after writeValueArray()
    -
    void freeValueArray(ValueArray* array) {
    +
    void freeValueArray(ValueArray* array) {
       FREE_ARRAY(Value, array->values, array->capacity);
       initValueArray(array);
     }
    @@ -1020,7 +1020,7 @@ 

    14 . 5 .&

    Now that we have growable arrays of values, we can add one to Chunk to store the chunk’s constants.

    -
      uint8_t* code;
    +
      uint8_t* code;
     
    chunk.h
    in struct Chunk
      ValueArray constants;
    @@ -1029,7 +1029,7 @@ 

    14 . 5 .&
    chunk.h, in struct Chunk

    Don’t forget the include.

    -
    #include "common.h"
    +
    #include "common.h"
     
    chunk.h
    #include "value.h"
     
    @@ -1040,7 +1040,7 @@ 

    14 . 5 .&

    Ah, C, and its Stone Age modularity story. Where were we? Right. When we initialize a new chunk, we initialize its constant list too.

    -
      chunk->code = NULL;
    +
      chunk->code = NULL;
     
    chunk.c
    in initChunk()
      initValueArray(&chunk->constants);
    @@ -1049,7 +1049,7 @@ 

    14 . 5 .&
    chunk.c, in initChunk()

    Likewise, we free the constants when we free the chunk.

    -
      FREE_ARRAY(uint8_t, chunk->code, chunk->capacity);
    +
      FREE_ARRAY(uint8_t, chunk->code, chunk->capacity);
     
    chunk.c
    in freeChunk()
      freeValueArray(&chunk->constants);
    @@ -1061,7 +1061,7 @@ 

    14 . 5 .& yet-to-be-written compiler could write to the constant array inside Chunk directlyit’s not like C has private fields or anythingbut it’s a little nicer to add an explicit function.

    -
    void writeChunk(Chunk* chunk, uint8_t byte);
    +
    void writeChunk(Chunk* chunk, uint8_t byte);
     
    chunk.h
    add after writeChunk()
    int addConstant(Chunk* chunk, Value value);
    @@ -1072,9 +1072,9 @@ 

    14 . 5 .&
    chunk.h, add after writeChunk()

    Then we implement it.

    -
    chunk.c
    +
    chunk.c
    add after writeChunk()
    -
    int addConstant(Chunk* chunk, Value value) {
    +
    int addConstant(Chunk* chunk, Value value) {
       writeValueArray(&chunk->constants, value);
       return chunk->constants.count - 1;
     }
    @@ -1086,13 +1086,13 @@ 

    14 . 5 .&

    14 . 5 . 3Constant instructions

    We can store constants in chunks, but we also need to execute them. In a piece of code like:

    -
    print 1;
    +
    print 1;
     print 2;
     

    The compiled chunk needs to not only contain the values 1 and 2, but know when to produce them so that they are printed in the right order. Thus, we need an instruction that produces a particular constant.

    -
    typedef enum {
    +
    typedef enum {
     
    chunk.h
    in enum OpCode
      OP_CONSTANT,
    @@ -1130,7 +1130,7 @@ 

    14 

    In this case, OP_CONSTANT takes a single byte operand that specifies which constant to load from the chunk’s constant array. Since we don’t have a compiler yet, we “hand-compile” an instruction in our test chunk.

    -
      initChunk(&chunk);
    +
      initChunk(&chunk);
     
    main.c
    in main()
    @@ -1150,7 +1150,7 @@ 

    14  bytes as far as that function is concerned.

    If we try to run this now, the disassembler is going to yell at us because it doesn’t know how to decode the new instruction. Let’s fix that.

    -
      switch (instruction) {
    +
      switch (instruction) {
     
    debug.c
    in disassembleInstruction()
        case OP_CONSTANT:
    @@ -1161,9 +1161,9 @@ 

    14 

    This instruction has a different instruction format, so we write a new helper function to disassemble it.

    -
    debug.c
    +
    debug.c
    add after disassembleChunk()
    -
    static int constantInstruction(const char* name, Chunk* chunk,
    +
    static int constantInstruction(const char* name, Chunk* chunk,
                                    int offset) {
       uint8_t constant = chunk->code[offset + 1];
       printf("%-16s %4d '", name, constant);
    @@ -1180,7 +1180,7 @@ 

    14  compile time after alland display the value itself too.

    This requires some way to print a clox Value. That function will live in the “value” module, so we include that.

    -
    #include "debug.h"
    +
    #include "debug.h"
     
    debug.c
    #include "value.h"
     
    @@ -1190,7 +1190,7 @@ 

    14 
    debug.c

    Over in that header, we declare:

    -
    void freeValueArray(ValueArray* array);
    +
    void freeValueArray(ValueArray* array);
     
    value.h
    add after freeValueArray()
    void printValue(Value value);
    @@ -1201,9 +1201,9 @@ 

    14 
    value.h, add after freeValueArray()

    And here’s an implementation:

    -
    value.c
    +
    value.c
    add after freeValueArray()
    -
    void printValue(Value value) {
    +
    void printValue(Value value) {
       printf("%g", value);
     }
     
    @@ -1212,7 +1212,7 @@

    14 

    Magnificent, right? As you can imagine, this is going to get more complex once we add dynamic typing to Lox and have values of different types.

    Back in constantInstruction(), the only remaining piece is the return value.

    -
      printf("'\n");
    +
      printf("'\n");
     
    debug.c
    in constantInstruction()
      return offset + 2;
    @@ -1252,7 +1252,7 @@ 

    14 . 6< operands it cares about.

    To implement this, we add another array to Chunk.

    -
      uint8_t* code;
    +
      uint8_t* code;
     
    chunk.h
    in struct Chunk
      int* lines;
    @@ -1263,7 +1263,7 @@ 

    14 . 6<

    Since it exactly parallels the bytecode array, we don’t need a separate count or capacity. Every time we touch the code array, we make a corresponding change to the line number array, starting with initialization.

    -
      chunk->code = NULL;
    +
      chunk->code = NULL;
     
    chunk.c
    in initChunk()
      chunk->lines = NULL;
    @@ -1272,7 +1272,7 @@ 

    14 . 6<
    chunk.c, in initChunk()

    And likewise deallocation:

    -
      FREE_ARRAY(uint8_t, chunk->code, chunk->capacity);
    +
      FREE_ARRAY(uint8_t, chunk->code, chunk->capacity);
     
    chunk.c
    in freeChunk()
      FREE_ARRAY(int, chunk->lines, chunk->capacity);
    @@ -1282,7 +1282,7 @@ 

    14 . 6<

    When we write a byte of code to the chunk, we need to know what source line it came from, so we add an extra parameter in the declaration of writeChunk().

    -
    void freeChunk(Chunk* chunk);
    +
    void freeChunk(Chunk* chunk);
     
    chunk.h
    function writeChunk()
    replace 1 line
    @@ -1292,7 +1292,7 @@

    14 . 6<
    chunk.h, function writeChunk(), replace 1 line

    And in the implementation:

    -
    chunk.c
    +
    chunk.c
    function writeChunk()
    replace 1 line
    void writeChunk(Chunk* chunk, uint8_t byte, int line) {
    @@ -1301,7 +1301,7 @@ 

    14 . 6<
    chunk.c, function writeChunk(), replace 1 line

    When we allocate or grow the code array, we do the same for the line info too.

    -
        chunk->code = GROW_ARRAY(uint8_t, chunk->code,
    +
        chunk->code = GROW_ARRAY(uint8_t, chunk->code,
             oldCapacity, chunk->capacity);
     
    chunk.c
    in writeChunk()
    @@ -1312,7 +1312,7 @@

    14 . 6<
    chunk.c, in writeChunk()

    Finally, we store the line number in the array.

    -
      chunk->code[chunk->count] = byte;
    +
      chunk->code[chunk->count] = byte;
     
    chunk.c
    in writeChunk()
      chunk->lines[chunk->count] = line;
    @@ -1324,7 +1324,7 @@ 

    —arbitrary at this pointline number.

    -
      int constant = addConstant(&chunk, 1.2);
    +
      int constant = addConstant(&chunk, 1.2);
     
    main.c
    in main()
    replace 4 lines
    @@ -1346,7 +1346,7 @@

    —the number of bytes from the beginning of the chunkwe show its source line.

    -
    int disassembleInstruction(Chunk* chunk, int offset) {
    +
    int disassembleInstruction(Chunk* chunk, int offset) {
       printf("%04d ", offset);
     
    debug.c
    in disassembleInstruction()
    @@ -1367,7 +1367,7 @@

    == test chunk ==
    +
    == test chunk ==
     0000  123 OP_CONSTANT         0 '1.2'
     0002    | OP_RETURN
     
    @@ -1416,7 +1416,7 @@

    Challenges

    define a second OP_CONSTANT_LONG instruction. It stores the operand as a 24-bit number, which should be plenty.

    Implement this function:

    -
    void writeConstant(Chunk* chunk, Value value, int line) {
    +
    void writeConstant(Chunk* chunk, Value value, int line) {
       // Implement me...
     }
     
    diff --git a/site/classes-and-instances.html b/site/classes-and-instances.html index 2ae840618..ffbcc32a1 100644 --- a/site/classes-and-instances.html +++ b/site/classes-and-instances.html @@ -1,5 +1,5 @@ - + Classes and Instances · Crafting Interpreters @@ -125,7 +125,7 @@

    27 . 1

    By this point, we’re well-acquainted with the process of adding a new object type to the VM. We start with a struct.

    -
    } ObjClosure;
    +
    } ObjClosure;
     
    object.h
    add after struct ObjClosure
    @@ -144,7 +144,7 @@ 

    27 . 1

    The new type needs a corresponding case in the ObjType enum.

    -
    typedef enum {
    +
    typedef enum {
     
    object.h
    in enum ObjType
      OBJ_CLASS,
    @@ -154,7 +154,7 @@ 

    27 . 1And that type gets a corresponding pair of macros. First, for testing an object’s type:

    -
    #define OBJ_TYPE(value)        (AS_OBJ(value)->type)
    +
     
     

    And then for casting a Value to an ObjClass pointer:

    -
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
    +
     
     

    The VM creates new class objects using this function:

    -
    } ObjClass;
    +
     
     

    The implementation lives over here:

    -
    object.c
    +
    object.c
    add after allocateObject()
    -
    ObjClass* newClass(ObjString* name) {
    +
    ObjClass* newClass(ObjString* name) {
       ObjClass* klass = ALLOCATE_OBJ(ObjClass, OBJ_CLASS);
       klass->name = name; 
       return klass;
    @@ -201,7 +201,7 @@ 

    27 . 1

    When the VM no longer needs a class, it frees it like so:

    -
      switch (object->type) {
    +
      switch (object->type) {
     
    memory.c
    in freeObject()
        case OBJ_CLASS: {
    @@ -218,7 +218,7 @@ 

    27 . 1

    We have a memory manager now, so we also need to support tracing through class objects.

    -
      switch (object->type) {
    +
      switch (object->type) {
     
    memory.c
    in blackenObject()
        case OBJ_CLASS: {
    @@ -233,7 +233,7 @@ 

    27 . 1When the GC reaches a class object, it marks the class’s name to keep that string alive too.

    The last operation the VM can perform on a class is printing it.

    -
      switch (OBJ_TYPE(value)) {
    +
      switch (OBJ_TYPE(value)) {
     
    object.c
    in printObject()
        case OBJ_CLASS:
    @@ -247,7 +247,7 @@ 

    27 . 127 . 2Class Declarations

    Runtime representation in hand, we are ready to add support for classes to the language. Next, we move into the parser.

    -
    static void declaration() {
    +
    static void declaration() {
     
    compiler.c
    in declaration()
    replace 1 line
    @@ -260,9 +260,9 @@

    27 .̴

    Class declarations are statements, and the parser recognizes one by the leading class keyword. The rest of the compilation happens over here:

    -
    compiler.c
    +
    compiler.c
    add after function()
    -
    static void classDeclaration() {
    +
    static void classDeclaration() {
       consume(TOKEN_IDENTIFIER, "Expect class name.");
       uint8_t nameConstant = identifierConstant(&parser.previous);
       declareVariable();
    @@ -287,7 +287,7 @@ 

    27 .̴

    There you have it, our VM supports classes now. You can run this:

    -
    class Brioche {}
    +
    class Brioche {}
     print Brioche;
     

    Unfortunately, printing is about all you can do with classes, so next is @@ -372,7 +372,7 @@

    27 .&

    We won’t get to methods until the next chapter, so for now we will only worry about the first part. Before classes can create instances, we need a representation for them.

    -
    } ObjClass;
    +
    } ObjClass;
     
    object.h
    add after struct ObjClass
    @@ -409,7 +409,7 @@ 

    27 .& accessing a field is as fast as offsetting a pointer by an integer constant.

    We only need to add an include, and we’ve got it.

    -
    #include "chunk.h"
    +
    #include "chunk.h"
     
    object.h
    #include "table.h"
     
    #include "value.h"
    @@ -417,7 +417,7 @@ 

    27 .&
    object.h

    This new struct gets a new object type.

    -
      OBJ_FUNCTION,
    +
      OBJ_FUNCTION,
     
    object.h
    in enum ObjType
      OBJ_INSTANCE,
    @@ -437,7 +437,7 @@ 

    27 .& class it is an instance of, is an ObjInstance. That one VM object type covers instances of all classes. The two worlds map to each other something like this:

    A set of class declarations and instances, and the runtime representations each maps to.

    Got it? OK, back to the implementation. We also get our usual macros.

    -
    #define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
    +
    #define IS_FUNCTION(value)     isObjType(value, OBJ_FUNCTION)
     
    object.h
    #define IS_INSTANCE(value)     isObjType(value, OBJ_INSTANCE)
     
    #define IS_NATIVE(value)       isObjType(value, OBJ_NATIVE)
    @@ -445,7 +445,7 @@ 

    27 .&
    object.h

    And:

    -
    #define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
    +
    #define AS_FUNCTION(value)     ((ObjFunction*)AS_OBJ(value))
     
    object.h
    #define AS_INSTANCE(value)     ((ObjInstance*)AS_OBJ(value))
     
    #define AS_NATIVE(value) \
    @@ -454,7 +454,7 @@ 

    27 .&

    Since fields are added after the instance is created, the “constructor” function only needs to know the class.

    -
    ObjFunction* newFunction();
    +
    ObjFunction* newFunction();
     
    object.h
    add after newFunction()
    ObjInstance* newInstance(ObjClass* klass);
    @@ -463,9 +463,9 @@ 

    27 .&
    object.h, add after newFunction()

    We implement that function here:

    -
    object.c
    +
    object.c
    add after newFunction()
    -
    ObjInstance* newInstance(ObjClass* klass) {
    +
    ObjInstance* newInstance(ObjClass* klass) {
       ObjInstance* instance = ALLOCATE_OBJ(ObjInstance, OBJ_INSTANCE);
       instance->klass = klass;
       initTable(&instance->fields);
    @@ -477,7 +477,7 @@ 

    27 .&

    We store a reference to the instance’s class. Then we initialize the field table to an empty hash table. A new baby object is born!

    At the sadder end of the instance’s lifespan, it gets freed.

    -
          FREE(ObjFunction, object);
    +
          FREE(ObjFunction, object);
           break;
         }
     
    memory.c
    @@ -498,7 +498,7 @@

    27 .& those for us. Here we free only the entry array of the table itself.

    Speaking of the garbage collector, it needs support for tracing through instances.

    -
          markArray(&function->chunk.constants);
    +
          markArray(&function->chunk.constants);
           break;
         }
     
    memory.c
    @@ -519,7 +519,7 @@

    27 .& field. Fortunately, we already have a nice markTable() function to make tracing them easy.

    Less critical but still important is printing.

    -
          break;
    +
          break;
     
    object.c
    in printObject()
        case OBJ_INSTANCE:
    @@ -545,7 +545,7 @@ 

    27 .& type of object being called to make sure the user doesn’t try to invoke a number or other invalid type.

    We extend that runtime checking with a new case.

    -
        switch (OBJ_TYPE(callee)) {
    +
        switch (OBJ_TYPE(callee)) {
     
    vm.c
    in callValue()
          case OBJ_CLASS: {
    @@ -566,7 +566,7 @@ 

    27 .& the next chapter when we add support for initializers.

    We’re one step farther. Now we can define classes and create instances of them.

    -
    class Brioche {}
    +
    class Brioche {}
     print Brioche();
     

    Note the parentheses after Brioche on the second line now. This prints @@ -576,7 +576,7 @@

    27 remains is exposing that functionality to the user. Fields are accessed and modified using get and set expressions. Not one to break with tradition, Lox uses the classic “dot” syntax:

    -
    eclair.filling = "pastry creme";
    +
    eclair.filling = "pastry creme";
     print eclair.filling;
     

    The periodfull stop for my English friendsworks 27 but a single identifier whose semantics are handled by the get or set expression itself. It’s really closer to a postfix expression.

    -
      [TOKEN_COMMA]         = {NULL,     NULL,   PREC_NONE},
    +
      [TOKEN_COMMA]         = {NULL,     NULL,   PREC_NONE},
     
    compiler.c
    replace 1 line
      [TOKEN_DOT]           = {NULL,     dot,    PREC_CALL},
    @@ -600,9 +600,9 @@ 

    27

    As in other languages, the . operator binds tightly, with precedence as high as the parentheses in a function call. After the parser consumes the dot token, it dispatches to a new parse function.

    -
    compiler.c
    +
    compiler.c
    add after call()
    -
    static void dot(bool canAssign) {
    +
    static void dot(bool canAssign) {
       consume(TOKEN_IDENTIFIER, "Expect property name after '.'.");
       uint8_t name = identifierConstant(&parser.previous);
     
    @@ -629,13 +629,13 @@ 

    27 function handles. If we see an equals sign after the field name, it must be a set expression that is assigning to a field. But we don’t always allow an equals sign after the field to be compiled. Consider:

    -
    a + b.c = 3
    +
    a + b.c = 3
     

    This is syntactically invalid according to Lox’s grammar, which means our Lox implementation is obligated to detect and report the error. If dot() silently parsed the = 3 part, we would incorrectly interpret the code as if the user had written:

    -
    a + (b.c = 3)
    +
    a + (b.c = 3)
     

    The problem is that the = side of a set expression has much lower precedence than the . part. The parser may call dot() in a context that is too high @@ -657,7 +657,7 @@

    27 instruction.

    Now is a good time to define these two new instructions.

    -
      OP_SET_UPVALUE,
    +
      OP_SET_UPVALUE,
     
    chunk.h
    in enum OpCode
      OP_GET_PROPERTY,
    @@ -667,7 +667,7 @@ 

    27
    chunk.h, in enum OpCode

    And add support for disassembling them:

    -
          return byteInstruction("OP_SET_UPVALUE", chunk, offset);
    +
          return byteInstruction("OP_SET_UPVALUE", chunk, offset);
     
    debug.c
    in disassembleInstruction()
        case OP_GET_PROPERTY:
    @@ -681,7 +681,7 @@ 

    27

    27 . 4 . 1Interpreting getter and setter expressions

    Sliding over to the runtime, we’ll start with get expressions since those are a little simpler.

    -
          }
    +
          }
     
    vm.c
    in run()
          case OP_GET_PROPERTY: {
    @@ -706,7 +706,7 @@ 

              push(value);
    +
              push(value);
               break;
             }
     
    vm.c
    @@ -724,7 +724,7 @@

    var obj = "not an instance";
    +
    var obj = "not an instance";
     print obj.field;
     

    The user’s program is wrong, but the VM still has to handle it with some grace. @@ -743,7 +743,7 @@

          case OP_GET_PROPERTY: {
    +
          case OP_GET_PROPERTY: {
     
    vm.c
    in run()
            if (!IS_INSTANCE(peek(0))) {
    @@ -759,7 +759,7 @@ 

            return INTERPRET_RUNTIME_ERROR;
    +
            return INTERPRET_RUNTIME_ERROR;
           }
     
    vm.c
    in run()
    @@ -788,7 +788,7 @@

    The stack operations go like this:

    Popping two values and then pushing the first value back on the stack. -
    class Toast {}
    +
    class Toast {}
     var toast = Toast();
     print toast.jam = "grape"; // Prints "grape".
     
    @@ -796,7 +796,7 @@

          case OP_SET_PROPERTY: {
    +
          case OP_SET_PROPERTY: {
     
    vm.c
    in run()
            if (!IS_INSTANCE(peek(1))) {
    @@ -811,7 +811,7 @@ 

    class Pair {}
    +
    class Pair {}
     
     var pair = Pair();
     pair.first = 1;
    diff --git a/site/classes.html b/site/classes.html
    index dfef1a0f5..cad2ba55e 100644
    --- a/site/classes.html
    +++ b/site/classes.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Classes · Crafting Interpreters
    @@ -165,7 +165,7 @@ 

    12 . 112 . 2Class Declarations

    Like we do, we’re gonna start with syntax. A class statement introduces a new name, so it lives in the declaration grammar rule.

    -
    declarationclassDecl
    +
    declarationclassDecl
                    | funDecl
                    | varDecl
                    | statement ;
    @@ -174,7 +174,7 @@ 

    12 .̴

    The new classDecl rule relies on the function rule we defined earlier. To refresh your memory:

    -
    functionIDENTIFIER "(" parameters? ")" block ;
    +
    functionIDENTIFIER "(" parameters? ")" block ;
     parametersIDENTIFIER ( "," IDENTIFIER )* ;
     

    In plain English, a class declaration is the class keyword, followed by the @@ -185,7 +185,7 @@

    12 .̴ -
    class Breakfast {
    +
    class Breakfast {
       cook() {
         print "Eggs a-fryin'!";
       }
    @@ -200,7 +200,7 @@ 

    12 .̴ fields to them as you see fit using normal imperative code.

    Over in our AST generator, the classDecl grammar rule gets its own statement node.

    -
          "Block      : List<Stmt> statements",
    +
          "Block      : List<Stmt> statements",
     
    tool/GenerateAst.java
    in main()
          "Class      : Token name, List<Stmt.Function> methods",
    @@ -217,7 +217,7 @@ 

    12 .̴ method: name, parameter list, and body.

    A class can appear anywhere a named declaration is allowed, triggered by the leading class keyword.

    -
        try {
    +
        try {
     
    lox/Parser.java
    in declaration()
          if (match(CLASS)) return classDeclaration();
    @@ -226,9 +226,9 @@ 

    12 .̴
    lox/Parser.java, in declaration()

    That calls out to:

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after declaration()
    -
      private Stmt classDeclaration() {
    +
      private Stmt classDeclaration() {
         Token name = consume(IDENTIFIER, "Expect class name.");
         consume(LEFT_BRACE, "Expect '{' before class body.");
     
    @@ -258,9 +258,9 @@ 

    12 .̴

    We wrap the name and list of methods into a Stmt.Class node and we’re done. Previously, we would jump straight into the interpreter, but now we need to plumb the node through the resolver first.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitBlockStmt()
    -
      @Override
    +
      @Override
       public Void visitClassStmt(Stmt.Class stmt) {
         declare(stmt.name);
         define(stmt.name);
    @@ -274,9 +274,9 @@ 

    12 .̴ declare a class as a local variable, but Lox permits it, so we need to handle it correctly.

    Now we interpret the class declaration.

    -
    lox/Interpreter.java
    +
    lox/Interpreter.java
    add after visitBlockStmt()
    -
      @Override
    +
      @Override
       public Void visitClassStmt(Stmt.Class stmt) {
         environment.define(stmt.name.lexeme, null);
         LoxClass klass = new LoxClass(stmt.name.lexeme);
    @@ -293,9 +293,9 @@ 

    12 .̴ variable binding process allows references to the class inside its own methods.

    We will refine it throughout the chapter, but the first draft of LoxClass looks like this:

    -
    lox/LoxClass.java
    +
    lox/LoxClass.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.List;
     import java.util.Map;
    @@ -318,7 +318,7 @@ 

    12 .̴

    Literally a wrapper around a name. We don’t even store the methods yet. Not super useful, but it does have a toString() method so we can write a trivial script and test that class objects are actually being parsed and executed.

    -
    class DevonshireCream {
    +
    class DevonshireCream {
       serveOn() {
         return "Scones";
       }
    @@ -349,13 +349,13 @@ 

    12 .̴ introduce syntax like new. Therefore, we can skip past the front end straight into the runtime.

    Right now, if you try this:

    -
    class Bagel {}
    +
    class Bagel {}
     Bagel();
     

    You get a runtime error. visitCallExpr() checks to see if the called object implements LoxCallable and reports an error since LoxClass doesn’t. Not yet, that is.

    -
    import java.util.Map;
    +
    import java.util.Map;
     
     
    lox/LoxClass.java
    replace 1 line
    @@ -365,9 +365,9 @@

    12 .̴
    lox/LoxClass.java, replace 1 line

    Implementing that interface requires two methods.

    -
    lox/LoxClass.java
    +
    lox/LoxClass.java
    add after toString()
    -
      @Override
    +
      @Override
       public Object call(Interpreter interpreter,
                          List<Object> arguments) {
         LoxInstance instance = new LoxInstance(this);
    @@ -388,9 +388,9 @@ 

    12 .̴ constructors, we’ll revisit this.

    That leads us to LoxInstance, the runtime representation of an instance of a Lox class. Again, our first implementation starts small.

    -
    lox/LoxInstance.java
    +
    lox/LoxInstance.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.HashMap;
     import java.util.Map;
    @@ -412,7 +412,7 @@ 

    12 .̴

    Like LoxClass, it’s pretty bare bones, but we’re only getting started. If you want to give it a try, here’s a script to run:

    -
    class Bagel {}
    +
    class Bagel {}
     var bagel = Bagel();
     print bagel; // Prints "Bagel instance".
     
    @@ -436,20 +436,20 @@

    12 on the current object.

    Lox, for better or worse, isn’t quite so pious about its OOP faith.

    -
    someObject.someProperty
    +
    someObject.someProperty
     

    An expression followed by . and an identifier reads the property with that name from the object the expression evaluates to. That dot has the same precedence as the parentheses in a function call expression, so we slot it into the grammar by replacing the existing call rule with:

    -
    callprimary ( "(" arguments? ")" | "." IDENTIFIER )* ;
    +
    callprimary ( "(" arguments? ")" | "." IDENTIFIER )* ;
     

    After a primary expression, we allow a series of any mixture of parenthesized calls and dotted property accesses. “Property access” is a mouthful, so from here on out, we’ll call these “get expressions”.

    12 . 4 . 1Get expressions

    The syntax tree node is:

    -
          "Call     : Expr callee, Token paren, List<Expr> arguments",
    +
          "Call     : Expr callee, Token paren, List<Expr> arguments",
     
    tool/GenerateAst.java
    in main()
          "Get      : Expr object, Token name",
    @@ -462,7 +462,7 @@ 

    12 . 4

    Following the grammar, the new parsing code goes in our existing call() method.

    -
        while (true) { 
    +
        while (true) { 
           if (match(LEFT_PAREN)) {
             expr = finishCall(expr);
     
    lox/Parser.java
    @@ -482,9 +482,9 @@

    12 . 4 along the tokens building up a chain of calls and gets as we find parentheses and dots, like so:

    Parsing a series of '.' and '()' expressions to an AST.

    Instances of the new Expr.Get node feed into the resolver.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitCallExpr()
    -
      @Override
    +
      @Override
       public Void visitGetExpr(Expr.Get expr) {
         resolve(expr.object);
         return null;
    @@ -500,9 +500,9 @@ 

    12 . 4

    You can literally see that property dispatch in Lox is dynamic since we don’t process the property name during the static resolution pass.

    -
    lox/Interpreter.java
    +
    lox/Interpreter.java
    add after visitCallExpr()
    -
      @Override
    +
      @Override
       public Object visitGetExpr(Expr.Get expr) {
         Object object = evaluate(expr.object);
         if (object instanceof LoxInstance) {
    @@ -520,7 +520,7 @@ 

    12 . 4 number, invoking a getter on it is a runtime error.

    If the object is a LoxInstance, then we ask it to look up the property. It must be time to give LoxInstance some actual state. A map will do fine.

    -
      private LoxClass klass;
    +
      private LoxClass klass;
     
    lox/LoxInstance.java
    in class LoxInstance
      private final Map<String, Object> fields = new HashMap<>();
    @@ -532,9 +532,9 @@ 

    12 . 4

    Each key in the map is a property name and the corresponding value is the property’s value. To look up a property on an instance:

    -
    lox/LoxInstance.java
    +
    lox/LoxInstance.java
    add after LoxInstance()
    -
      Object get(Token name) {
    +
      Object get(Token name) {
         if (fields.containsKey(name.lexeme)) {
           return fields.get(name.lexeme);
         }
    @@ -575,11 +575,11 @@ 

    12 . 4

    12 . 4 . 2Set expressions

    Setters use the same syntax as getters, except they appear on the left side of an assignment.

    -
    someObject.someProperty = value;
    +
    someObject.someProperty = value;
     

    In grammar land, we extend the rule for assignment to allow dotted identifiers on the left-hand side.

    -
    assignment     → ( call "." )? IDENTIFIER "=" assignment
    +
    assignment     → ( call "." )? IDENTIFIER "=" assignment
                    | logic_or ;
     

    Unlike getters, setters don’t chain. However, the reference to call allows any @@ -590,7 +590,7 @@

    12 . 4

    Just as we have two separate AST nodes for variable access and variable assignment, we need a second setter node to complement our getter node.

    -
          "Logical  : Expr left, Token operator, Expr right",
    +
          "Logical  : Expr left, Token operator, Expr right",
     
    tool/GenerateAst.java
    in main()
          "Set      : Expr object, Token name, Expr value",
    @@ -613,7 +613,7 @@ 

    12 . 4 assignment.

    We add another clause to that transformation to handle turning an Expr.Get expression on the left into the corresponding Expr.Set.

    -
            return new Expr.Assign(name, value);
    +
            return new Expr.Assign(name, value);
     
    lox/Parser.java
    in assignment()
          } else if (expr instanceof Expr.Get) {
    @@ -624,9 +624,9 @@ 

    12 . 4
    lox/Parser.java, in assignment()

    That’s parsing our syntax. We push that node through into the resolver.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitLogicalExpr()
    -
      @Override
    +
      @Override
       public Void visitSetExpr(Expr.Set expr) {
         resolve(expr.value);
         resolve(expr.object);
    @@ -640,9 +640,9 @@ 

    12 . 4 subexpressions of Expr.Set, the object whose property is being set, and the value it’s being set to.

    That leads us to the interpreter.

    -
    lox/Interpreter.java
    +
    lox/Interpreter.java
    add after visitLogicalExpr()
    -
      @Override
    +
      @Override
       public Object visitSetExpr(Expr.Set expr) {
         Object object = evaluate(expr.object);
     
    @@ -679,9 +679,9 @@ 

    12 . 4 to carefully specify it and ensure our implementations do these in the same order.

    -
    lox/LoxInstance.java
    +
    lox/LoxInstance.java
    add after get()
    -
      void set(Token name, Object value) {
    +
      void set(Token name, Object value) {
         fields.put(name.lexeme, value);
       }
     
    @@ -703,13 +703,13 @@

    12 .̴ pulled apart? Assuming that method in this example is a method on the class of object and not a field on the instance, what should the following piece of code do?

    -
    var m = object.method;
    +
    var m = object.method;
     m(argument);
     

    This program “looks up” the method and stores the resultwhatever that isin a variable and then calls that object later. Is this allowed? Can you treat a method like it’s a function on the instance?

    What about the other direction?

    -
    class Box {}
    +
    class Box {}
     
     fun notMethod(argument) {
       print "called function with " + argument;
    @@ -730,10 +730,10 @@ 

    12 .̴

    The first example is more obscure. One motivation is that users generally expect to be able to hoist a subexpression out into a local variable without changing the meaning of the program. You can take this:

    -
    breakfast(omelette.filledWith(cheese), sausage);
    +
    breakfast(omelette.filledWith(cheese), sausage);
     

    And turn it into this:

    -
    var eggs = omelette.filledWith(cheese);
    +
    var eggs = omelette.filledWith(cheese);
     breakfast(eggs, sausage);
     

    And it does the same thing. Likewise, since the . and the () in a method @@ -747,17 +747,17 @@

    12 .̴ body simply invokes a method on some object. Being able to look up the method and pass it directly saves you the chore of manually declaring a function to wrap it. Compare this:

    -
    fun callback(a, b, c) {
    +
    fun callback(a, b, c) {
       object.method(a, b, c);
     }
     
     takeCallback(callback);
     

    With this:

    -
    takeCallback(object.method);
    +
    takeCallback(object.method);
     
    -
    class Person {
    +
    class Person {
       sayName() {
         print this.name;
       }
    @@ -773,7 +773,7 @@ 

    12 .̴ “remember” the instance it was pulled off from? Does this inside the method still refer to that original object?

    Here’s a more pathological example to bend your brain:

    -
    class Person {
    +
    class Person {
       sayName() {
         print this.name;
       }
    @@ -809,7 +809,7 @@ 

    12 .̴ cases for a bit. We’ll get back to those. For now, let’s get basic method calls working. We’re already parsing the method declarations inside the class body, so the next step is to resolve them.

    -
        define(stmt.name);
    +
        define(stmt.name);
     
    lox/Resolver.java
    in visitClassStmt()
    @@ -830,7 +830,7 @@ 

    12 .̴

    We iterate through the methods in the class body and call the resolveFunction() method we wrote for handling function declarations already. The only difference is that we pass in a new FunctionType enum value.

    -
        NONE,
    +
        NONE,
     
        FUNCTION,
     
    lox/Resolver.java
    in enum FunctionType
    @@ -842,7 +842,7 @@

    12 .̴

    That’s going to be important when we resolve this expressions. For now, don’t worry about it. The interesting stuff is in the interpreter.

    -
        environment.define(stmt.name.lexeme, null);
    +
        environment.define(stmt.name.lexeme, null);
     
    lox/Interpreter.java
    in visitClassStmt()
    replace 1 line
    @@ -865,7 +865,7 @@

    12 .̴ method declaration blossoms into a LoxFunction object.

    We take all of those and wrap them up into a map, keyed by the method names. That gets stored in LoxClass.

    -
      final String name;
    +
      final String name;
     
    lox/LoxClass.java
    in class LoxClass
    replace 4 lines
    @@ -885,7 +885,7 @@

    12 .̴

    Where an instance stores state, the class stores behavior. LoxInstance has its map of fields, and LoxClass gets a map of methods. Even though methods are owned by the class, they are still accessed through instances of that class.

    -
      Object get(Token name) {
    +
      Object get(Token name) {
         if (fields.containsKey(name.lexeme)) {
           return fields.get(name.lexeme);
         }
    @@ -911,9 +911,9 @@ 

    12 .̴

    Looking for a field first implies that fields shadow methods, a subtle but important semantic point.

    -
    lox/LoxClass.java
    +
    lox/LoxClass.java
    add after LoxClass()
    -
      LoxFunction findMethod(String name) {
    +
      LoxFunction findMethod(String name) {
         if (methods.containsKey(name)) {
           return methods.get(name);
         }
    @@ -927,7 +927,7 @@ 

    12 .̴ now, a simple map lookup on the class’s method table is enough to get us started. Give it a try:

    -
    class Bacon {
    +
    class Bacon {
       eat() {
         print "Crunch crunch crunch!";
       }
    @@ -957,7 +957,7 @@ 

    12 . 6This

    called on. Or, more specifically, since methods are accessed and then invoked as two steps, it will refer to the object that the method was accessed from.

    That makes our job harder. Peep at:

    -
    class Egotist {
    +
    class Egotist {
       speak() {
         print this;
       }
    @@ -979,7 +979,7 @@ 

    12 . 6This

    the body would be able to find it later. LoxFunction already has the ability to hold on to a surrounding environment, so we have the machinery we need.

    Let’s walk through an example to see how it works:

    -
    class Cake {
    +
    class Cake {
       taste() {
         var adjective = "delicious";
         print "The " + this.flavor + " cake is " + adjective + "!";
    @@ -1006,7 +1006,7 @@ 

    12 . 6This

    successfully resolves to that instance.

    Reusing our environment code for implementing this also takes care of interesting cases where methods and functions interact, like:

    -
    class Thing {
    +
    class Thing {
       getCallback() {
         fun localFunction() {
           print this;
    @@ -1025,7 +1025,7 @@ 

    12 . 6This

    closures and environment chains should do all this correctly.

    Let’s code it up. The first step is adding new syntax for this.

    -
          "Set      : Expr object, Token name, Expr value",
    +
          "Set      : Expr object, Token name, Expr value",
     
    tool/GenerateAst.java
    in main()
          "This     : Token keyword",
    @@ -1038,7 +1038,7 @@ 

    12 . 6This

    Parsing is simple since it’s a single token which our lexer already recognizes as a reserved word.

    -
          return new Expr.Literal(previous().literal);
    +
          return new Expr.Literal(previous().literal);
         }
     
    lox/Parser.java
    in primary()
    @@ -1053,9 +1053,9 @@

    12 . 6This

    You can start to see how this works like a variable when we get to the resolver.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitSetExpr()
    -
      @Override
    +
      @Override
       public Void visitThisExpr(Expr.This expr) {
         resolveLocal(expr, expr.keyword);
         return null;
    @@ -1067,7 +1067,7 @@ 

    12 . 6This

    We resolve it exactly like any other local variable using “this” as the name for the “variable”. Of course, that’s not going to work right now, because “this” isn’t declared in any scope. Let’s fix that over in visitClassStmt().

    -
        define(stmt.name);
    +
        define(stmt.name);
     
     
    lox/Resolver.java
    in visitClassStmt()
    @@ -1081,7 +1081,7 @@

    12 . 6This

    Before we step in and start resolving the method bodies, we push a new scope and define “this” in it as if it were a variable. Then, when we’re done, we discard that surrounding scope.

    -
        }
    +
        }
     
     
    lox/Resolver.java
    in visitClassStmt()
    @@ -1100,7 +1100,7 @@

    12 . 6This

    each other. At runtime, we create the environment after we find the method on the instance. We replace the previous line of code that simply returned the method’s LoxFunction with this:

    -
        LoxFunction method = klass.findMethod(name.lexeme);
    +
        LoxFunction method = klass.findMethod(name.lexeme);
     
    lox/LoxInstance.java
    in get()
    replace 1 line
    @@ -1113,9 +1113,9 @@

    12 . 6This

    lox/LoxInstance.java, in get(), replace 1 line

    Note the new call to bind(). That looks like so:

    -
    lox/LoxFunction.java
    +
    lox/LoxFunction.java
    add after LoxFunction()
    -
      LoxFunction bind(LoxInstance instance) {
    +
      LoxFunction bind(LoxInstance instance) {
         Environment environment = new Environment(closure);
         environment.define("this", instance);
         return new LoxFunction(declaration, environment);
    @@ -1132,9 +1132,9 @@ 

    12 . 6This

    “this” is bound to the object.

    The remaining task is interpreting those this expressions. Similar to the resolver, it is the same as interpreting a variable expression.

    -
    lox/Interpreter.java
    +
    lox/Interpreter.java
    add after visitSetExpr()
    -
      @Override
    +
      @Override
       public Object visitThisExpr(Expr.This expr) {
         return lookUpVariable(expr.keyword, expr);
       }
    @@ -1148,10 +1148,10 @@ 

    12 . 6This

    12 . 6 . 1Invalid uses of this

    Wait a minute. What happens if you try to use this outside of a method? What about:

    -
    print this;
    +
    print this;
     

    Or:

    -
    fun notAMethod() {
    +
    fun notAMethod() {
       print this;
     }
     
    @@ -1163,7 +1163,7 @@

    12 .& detects return statements outside of functions. We’ll do something similar for this. In the vein of our existing FunctionType enum, we define a new ClassType one.

    -
      }
    +
      }
     
    lox/Resolver.java
    add after enum FunctionType
    @@ -1185,7 +1185,7 @@ 

    12 .& declaration while traversing the syntax tree. It starts out NONE which means we aren’t in one.

    When we begin to resolve a class declaration, we change that.

    -
      public Void visitClassStmt(Stmt.Class stmt) {
    +
      public Void visitClassStmt(Stmt.Class stmt) {
     
    lox/Resolver.java
    in visitClassStmt()
        ClassType enclosingClass = currentClass;
    @@ -1201,7 +1201,7 @@ 

    12 .& inside another.

    Once the methods have been resolved, we “pop” that stack by restoring the old value.

    -
        endScope();
    +
        endScope();
     
     
    lox/Resolver.java
    in visitClassStmt()
    @@ -1213,7 +1213,7 @@

    12 .&

    When we resolve a this expression, the currentClass field gives us the bit of data we need to report an error if the expression doesn’t occur nestled inside a method body.

    -
      public Void visitThisExpr(Expr.This expr) {
    +
      public Void visitThisExpr(Expr.This expr) {
     
    lox/Resolver.java
    in visitThisExpr()
        if (currentClass == ClassType.NONE) {
    @@ -1268,7 +1268,7 @@ 

    a class. C++, Java, and C# use a method whose name matches the class name. Ruby and Python call it init(). The latter is nice and short, so we’ll do that.

    In LoxClass’s implementation of LoxCallable, we add a few more lines.

    -
                         List<Object> arguments) {
    +
                         List<Object> arguments) {
         LoxInstance instance = new LoxInstance(this);
     
    lox/LoxClass.java
    in call()
    @@ -1285,7 +1285,7 @@

    method. If we find one, we immediately bind and invoke it just like a normal method call. The argument list is forwarded along.

    That argument list means we also need to tweak how a class declares its arity.

    -
      public int arity() {
    +
      public int arity() {
     
    lox/LoxClass.java
    in arity()
    replace 1 line
    @@ -1307,7 +1307,7 @@

    12 . 7 . 1Invoking init() directly

    As usual, exploring this new semantic territory rustles up a few weird creatures. Consider:

    -
    class Foo {
    +
    class Foo {
       init() {
         print this;
       }
    @@ -1332,7 +1332,7 @@ 

    12̴ happiness and productivity. The trick is figuring out which corners to cut that won’t cause your users and future self to curse your shortsightedness.

    -
          return returnValue.value;
    +
          return returnValue.value;
         }
     
    lox/LoxFunction.java
    in call()
    @@ -1345,7 +1345,7 @@

    12̴

    If the function is an initializer, we override the actual return value and forcibly return this. That relies on a new isInitializer field.

    -
      private final Environment closure;
    +
      private final Environment closure;
     
     
    lox/LoxFunction.java
    in class LoxFunction
    @@ -1365,7 +1365,7 @@

    12̴ this to return. To avoid that weird edge case, we’ll directly store whether the LoxFunction represents an initializer method. That means we need to go back and fix the few places where we create LoxFunctions.

    -
      public Void visitFunctionStmt(Stmt.Function stmt) {
    +
      public Void visitFunctionStmt(Stmt.Function stmt) {
     
    lox/Interpreter.java
    in visitFunctionStmt()
    replace 1 line
    @@ -1377,7 +1377,7 @@

    12̴

    For actual function declarations, isInitializer is always false. For methods, we check the name.

    -
        for (Stmt.Function method : stmt.methods) {
    +
        for (Stmt.Function method : stmt.methods) {
     
    lox/Interpreter.java
    in visitClassStmt()
    replace 1 line
    @@ -1389,7 +1389,7 @@

    12̴

    And then in bind() where we create the closure that binds this to a method, we pass along the original method’s value.

    -
        environment.define("this", instance);
    +
        environment.define("this", instance);
     
    lox/LoxFunction.java
    in bind()
    replace 1 line
    @@ -1403,7 +1403,7 @@

    12 .

    We aren’t out of the woods yet. We’ve been assuming that a user-written initializer doesn’t explicitly return a value because most constructors don’t. What should happen if a user tries:

    -
    class Foo {
    +
     

    It’s definitely not going to do what they want, so we may as well make it a static error. Back in the resolver, we add another case to FunctionType.

    -
        FUNCTION,
    +
        FUNCTION,
     
    lox/Resolver.java
    in enum FunctionType
        INITIALIZER,
    @@ -1421,7 +1421,7 @@ 

    12 .

    We use the visited method’s name to determine if we’re resolving an initializer or not.

    -
          FunctionType declaration = FunctionType.METHOD;
    +
          FunctionType declaration = FunctionType.METHOD;
     
    lox/Resolver.java
    in visitClassStmt()
          if (method.name.lexeme.equals("init")) {
    @@ -1434,7 +1434,7 @@ 

    12 .

    When we later traverse into a return statement, we check that field and make it an error to return a value from inside an init() method.

    -
        if (stmt.value != null) {
    +
        if (stmt.value != null) {
     
    lox/Resolver.java
    in visitReturnStmt()
          if (currentFunction == FunctionType.INITIALIZER) {
    @@ -1448,7 +1448,7 @@ 

    12 .

    We’re still not done. We statically disallow returning a value from an initializer, but you can still use an empty early return.

    -
    class Foo {
    +
    class Foo {
       init() {
         return;
       }
    @@ -1457,7 +1457,7 @@ 

    12 .

    That is actually kind of useful sometimes, so we don’t want to disallow it entirely. Instead, it should return this instead of nil. That’s an easy fix over in LoxFunction.

    -
        } catch (Return returnValue) {
    +
        } catch (Return returnValue) {
     
    lox/LoxFunction.java
    in call()
          if (isInitializer) return closure.getAt(0, "this");
    @@ -1479,7 +1479,7 @@ 

    Challenges

    that can be called directly on the class object itself. Add support for them. Use a class keyword preceding the method to indicate a static method that hangs off the class object.

    -
    class Math {
    +
    class Math {
       class square(n) {
         return n * n;
       }
    @@ -1497,7 +1497,7 @@ 

    Challenges

    code. Extend Lox to support getter methods. These are declared without a parameter list. The body of the getter is executed when a property with that name is accessed.

    -
    class Circle {
    +
    class Circle {
       init(radius) {
         this.radius = radius;
       }
    @@ -1560,7 +1560,7 @@ 

    Design Note: Prototypes and Powerpower, which I define as:

    -
    power = breadth × ease ÷ complexity
    +
    power = breadth × ease ÷ complexity
     

    None of these are precise numeric measures. I’m using math as analogy here, not actual quantification.

    diff --git a/site/closures.html b/site/closures.html index c0e2f5162..84760bbf9 100644 --- a/site/closures.html +++ b/site/closures.html @@ -1,5 +1,5 @@ - + Closures · Crafting Interpreters @@ -101,7 +101,7 @@

    Closures

    machine with working functions. What it lacks is closures. Aside from global variables, which are their own breed of animal, a function has no way to reference a variable declared outside of its own body.

    -
    var x = "global";
    +
    var x = "global";
     fun outer() {
       var x = "outer";
       fun inner() {
    @@ -118,7 +118,7 @@ 

    Closures

    stores locals on a stack. We used a stack because I claimed locals have stack semanticsvariables are discarded in the reverse order that they are created. But with closures, that’s only mostly true.

    -
    fun makeClosure() {
    +
    fun makeClosure() {
       var local = "local";
       fun closure() {
         print local;
    @@ -182,7 +182,7 @@ 

    25 . 1 -
    fun makeClosure(value) {
    +
    fun makeClosure(value) {
       fun closure() {
         print value;
       }
    @@ -217,9 +217,9 @@ 

    25 . 1 -
    object.h
    +
    object.h
    add after struct ObjString
    -
    typedef struct {
    +
    typedef struct {
       Obj obj;
       ObjFunction* function;
     } ObjClosure;
    @@ -229,7 +229,7 @@ 

    25 . 1Right now, it simply points to an ObjFunction and adds the necessary object header stuff. Grinding through the usual ceremony for adding a new object type to clox, we declare a C function to create a new closure.

    -
    } ObjClosure;
    +
     
     

    Then we implement it here:

    -
    object.c
    +
    object.c
    add after allocateObject()
    -
    ObjClosure* newClosure(ObjFunction* function) {
    +
    ObjClosure* newClosure(ObjFunction* function) {
       ObjClosure* closure = ALLOCATE_OBJ(ObjClosure, OBJ_CLOSURE);
       closure->function = function;
       return closure;
    @@ -251,7 +251,7 @@ 

    25 . 1It takes a pointer to the ObjFunction it wraps. It also initializes the type field to a new type.

    -
    typedef enum {
    +
     
     

    And when we’re done with a closure, we release its memory.

    -
      switch (object->type) {
    +
      switch (object->type) {
     
    memory.c
    in freeObject()
        case OBJ_CLOSURE: {
    @@ -283,7 +283,7 @@ 

    25 . 1Perhaps I should have defined a macro to make it easier to generate these macros. Maybe that would be a little too meta.

    -
    #define OBJ_TYPE(value)        (AS_OBJ(value)->type)
    +
     
     

    And to cast a value:

    -
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
    +
     
     

    Closures are first-class objects, so you can print them.

    -
      switch (OBJ_TYPE(value)) {
    +
      switch (OBJ_TYPE(value)) {
     
    object.c
    in printObject()
        case OBJ_CLOSURE:
    @@ -320,7 +320,7 @@ 

    -
      ObjFunction* function = endCompiler();
    +
      ObjFunction* function = endCompiler();
     
    compiler.c
    in function()
    replace 1 line
    @@ -332,7 +332,7 @@

    Before, the final bytecode for a function declaration was a single OP_CONSTANT instruction to load the compiled function from the surrounding function’s constant table and push it onto the stack. Now we have a new instruction.

    -
      OP_CALL,
    +
      OP_CALL,
     
    chunk.h
    in enum OpCode
      OP_CLOSURE,
    @@ -345,7 +345,7 @@ 

    First, let’s be diligent VM hackers and slot in disassembler support for the instruction.

    -
        case OP_CALL:
    +
        case OP_CALL:
           return byteInstruction("OP_CALL", chunk, offset);
     
    debug.c
    in disassembleInstruction()
    @@ -370,7 +370,7 @@

    —function calls, call frames, etc. We’ll start with the instruction, though.

    -
          }
    +
          }
     
    vm.c
    in run()
          case OP_CLOSURE: {
    @@ -387,7 +387,7 @@ 

        switch (OBJ_TYPE(callee)) {
    +
        switch (OBJ_TYPE(callee)) {
     
    vm.c
    in callValue()
    replace 2 lines
    @@ -409,7 +409,7 @@

    vm.c
    +
    vm.c
    function call()
    replace 1 line
    static bool call(ObjClosure* closure, int argCount) {
    @@ -420,7 +420,7 @@ 

    static bool call(ObjClosure* closure, int argCount) {
    +
    static bool call(ObjClosure* closure, int argCount) {
     
    vm.c
    in call()
    replace 3 lines
    @@ -435,7 +435,7 @@

    
     
     

    This necessitates changing the declaration of CallFrame too.

    -
    typedef struct {
    +
    typedef struct {
     
    vm.h
    in struct CallFrame
    replace 1 line
    @@ -458,7 +458,7 @@

        (uint16_t)((frame->ip[-2] << 8) | frame->ip[-1]))
    +
        (uint16_t)((frame->ip[-2] << 8) | frame->ip[-1]))
     
     
    vm.c
    in run()
    @@ -473,7 +473,7 @@

        printf("\n");
    +
     
     

    Likewise when reporting a runtime error:

    -
        CallFrame* frame = &vm.frames[i];
    +
        CallFrame* frame = &vm.frames[i];
     
    vm.c
    in runtimeError()
    replace 1 line
    @@ -495,7 +495,7 @@

      push(OBJ_VAL(function));
    +
      push(OBJ_VAL(function));
     
    vm.c
    in interpret()
    replace 1 line
    @@ -541,7 +541,7 @@

    25 . 2Upvalues<

    This would be a fine approach if clox didn’t have a single-pass compiler. But that restriction we chose in our implementation makes things harder. Take a look at this example:

    -
    fun outer() {
    +
    fun outer() {
       var x = 1;    // (1)
       x = 2;        // (2)
       fun inner() { // (3)
    @@ -567,7 +567,7 @@ 

    25 . 2Upvalues< and we create a closure for it, the VM creates the array of upvalues and wires them up to “capture” the surrounding local variables that the closure needs.

    For example, if we throw this program at clox,

    -
    {
    +
    {
       var a = 3;
       fun f() {
         print a;
    @@ -594,7 +594,7 @@ 

    25 .̴ consider the local scopes of enclosing functionsthey get skipped right over. The first change, then, is inserting a resolution step for those outer local scopes.

    -
      if (arg != -1) {
    +
      if (arg != -1) {
         getOp = OP_GET_LOCAL;
         setOp = OP_SET_LOCAL;
     
    compiler.c
    @@ -611,7 +611,7 @@

    25 .̴ that variable. (We’ll get into what that means later.) Otherwise, it returns -1 to indicate the variable wasn’t found. If it was found, we use these two new instructions for reading or writing to the variable through its upvalue:

    -
      OP_SET_GLOBAL,
    +
      OP_SET_GLOBAL,
     
    chunk.h
    in enum OpCode
      OP_GET_UPVALUE,
    @@ -623,9 +623,9 @@ 

    25 .̴

    We’re implementing this sort of top-down, so I’ll show you how these work at runtime soon. The part to focus on now is how the compiler actually resolves the identifier.

    -
    compiler.c
    +
    compiler.c
    add after resolveLocal()
    -
    static int resolveUpvalue(Compiler* compiler, Token* name) {
    +
    static int resolveUpvalue(Compiler* compiler, Token* name) {
       if (compiler->enclosing == NULL) return -1;
     
       int local = resolveLocal(compiler->enclosing, name);
    @@ -653,7 +653,7 @@ 

    25 .̴

    Otherwise, we try to resolve the identifier as a local variable in the enclosing compiler. In other words, we look for it right outside the current function. For example:

    -
    fun outer() {
    +
    fun outer() {
       var x = 1;
       fun inner() {
         print x; // (1)
    @@ -666,9 +666,9 @@ 

    25 .̴ examplethen we’ve successfully resolved the variable. We create an upvalue so that the inner function can access the variable through that. The upvalue is created here:

    -
    compiler.c
    +
    compiler.c
    add after resolveLocal()
    -
    static int addUpvalue(Compiler* compiler, uint8_t index,
    +
    static int addUpvalue(Compiler* compiler, uint8_t index,
                           bool isLocal) {
       int upvalueCount = compiler->function->upvalueCount;
       compiler->upvalues[upvalueCount].isLocal = isLocal;
    @@ -704,7 +704,7 @@ 

    25 .̴ separate upvalue for each identifier expression. To fix that, before we add a new upvalue, we first check to see if the function already has an upvalue that closes over that variable.

    -
      int upvalueCount = compiler->function->upvalueCount;
    +
      int upvalueCount = compiler->function->upvalueCount;
     
    compiler.c
    in addUpvalue()
    @@ -725,7 +725,7 @@ 

    25 .̴ through and add the new upvalue.

    These two functions access and modify a bunch of new state, so let’s define that. First, we add the upvalue count to ObjFunction.

    -
      int arity;
    +
      int arity;
     
    object.h
    in struct ObjFunction
      int upvalueCount;
    @@ -735,7 +735,7 @@ 

    25 .̴

    We’re conscientious C programmers, so we zero-initialize that when an ObjFunction is first allocated.

    -
      function->arity = 0;
    +
      function->arity = 0;
     
    object.c
    in newFunction()
      function->upvalueCount = 0;
    @@ -744,7 +744,7 @@ 

    25 .̴
    object.c, in newFunction()

    In the compiler, we add a field for the upvalue array.

    -
      int localCount;
    +
      int localCount;
     
    compiler.c
    in struct Compiler
      Upvalue upvalues[UINT8_COUNT];
    @@ -757,7 +757,7 @@ 

    25 .̴ operand, so there’s a restriction on how many upvalues a function can havehow many unique variables it can close over. Given that, we can afford a static array that large. We also need to make sure the compiler doesn’t overflow that limit.

    -
        if (upvalue->index == index && upvalue->isLocal == isLocal) {
    +
        if (upvalue->index == index && upvalue->isLocal == isLocal) {
           return i;
         }
       }
    @@ -774,9 +774,9 @@ 

    25 .̴
    compiler.c, in addUpvalue()

    Finally, the Upvalue struct type itself.

    -
    compiler.c
    +
    compiler.c
    add after struct Local
    -
    typedef struct {
    +
    typedef struct {
       uint8_t index;
       bool isLocal;
     } Upvalue;
    @@ -789,7 +789,7 @@ 

    25 .

    In the example I showed before, the closure is accessing a variable declared in the immediately enclosing function. Lox also supports accessing local variables declared in any enclosing scope, as in:

    -
    fun outer() {
    +
    fun outer() {
       var x = 1;
       fun middle() {
         fun inner() {
    @@ -809,7 +809,7 @@ 

    25 . valid but likely to trip up an implementation written by someone with a less perverse imagination than you.

    -
    fun outer() {
    +
     

    When you run this, it should print:

    -
    return from outer
    +
    return from outer
     create inner closure
     value
     
    @@ -867,7 +867,7 @@

    25 . guaranteed to still be around at the point that the inner function declaration executes.

    In order to implement this, resolveUpvalue() becomes recursive.

    -
      if (local != -1) {
    +
      if (local != -1) {
         return addUpvalue(compiler, (uint8_t)local, true);
       }
     
    @@ -927,7 +927,7 @@ 

    25 . or an upvalue in the case of transitive closures. We finally have enough data to emit bytecode which creates a closure at runtime that captures all of the correct variables.

    -
      emitBytes(OP_CLOSURE, makeConstant(OBJ_VAL(function)));
    +
      emitBytes(OP_CLOSURE, makeConstant(OBJ_VAL(function)));
     
    compiler.c
    in function()
    @@ -948,7 +948,7 @@ 

    25 . capture.

    This odd encoding means we need some bespoke support in the disassembly code for OP_CLOSURE.

    -
          printf("\n");
    +
          printf("\n");
     
    debug.c
    in disassembleInstruction()
    @@ -967,7 +967,7 @@ 

    25 .
    debug.c, in disassembleInstruction()

    For example, take this script:

    -
    fun outer() {
    +
     

    If we disassemble the instruction that creates the closure for inner(), it prints this:

    -
    0004    9 OP_CLOSURE          2 <fn inner>
    +
    0004    9 OP_CLOSURE          2 <fn inner>
     0006      |                     upvalue 0
     0008      |                     local 1
     0010      |                     upvalue 1
     0012      |                     local 2
     

    We have two other, simpler instructions to add disassembler support for.

    -
        case OP_SET_GLOBAL:
    +
        case OP_SET_GLOBAL:
           return constantInstruction("OP_SET_GLOBAL", chunk, offset);
     
    debug.c
    in disassembleInstruction()
    @@ -1002,7 +1002,7 @@

    25 .

    These both have a single-byte operand, so there’s nothing exciting going on. We do need to add an include so the debug module can get to AS_FUNCTION().

    -
    #include "debug.h"
    +
    #include "debug.h"
     
    debug.c
    #include "object.h"
     
    #include "value.h"
    @@ -1017,9 +1017,9 @@ 

    25 . 3Each OP_CLOSURE instruction is now followed by the series of bytes that specify the upvalues the ObjClosure should own. Before we process those operands, we need a runtime representation for upvalues.

    -
    object.h
    +
    object.h
    add after struct ObjString
    -
    typedef struct ObjUpvalue {
    +
    typedef struct ObjUpvalue {
       Obj obj;
       Value* location;
     } ObjUpvalue;
    @@ -1037,7 +1037,7 @@ 

    25 . 3variable, not a value. This is important because it means that when we assign to the variable the upvalue captures, we’re assigning to the actual variable, not a copy. For example:

    -
    fun outer() {
    +
    fun outer() {
       var x = "before";
       fun inner() {
         x = "assigned";
    @@ -1051,7 +1051,7 @@ 

    25 . 3

    Because upvalues are objects, we’ve got all the usual object machinery, starting with a constructor-like function:

    -
    ObjString* copyString(const char* chars, int length);
    +
    ObjString* copyString(const char* chars, int length);
     
    object.h
    add after copyString()
    ObjUpvalue* newUpvalue(Value* slot);
    @@ -1061,9 +1061,9 @@ 

    25 . 3It takes the address of the slot where the closed-over variable lives. Here is the implementation:

    -
    object.c
    +
    object.c
    add after copyString()
    -
    ObjUpvalue* newUpvalue(Value* slot) {
    +
    ObjUpvalue* newUpvalue(Value* slot) {
       ObjUpvalue* upvalue = ALLOCATE_OBJ(ObjUpvalue, OBJ_UPVALUE);
       upvalue->location = slot;
       return upvalue;
    @@ -1073,7 +1073,7 @@ 

    25 . 3We simply initialize the object and store the pointer. That requires a new object type.

    -
      OBJ_STRING,
    +
     
     

    And on the back side, a destructor-like function:

    -
          FREE(ObjString, object);
    +
          FREE(ObjString, object);
           break;
         }
     
    memory.c
    @@ -1098,7 +1098,7 @@

    25 . 3

    And, finally, to print:

    -
        case OBJ_STRING:
    +
        case OBJ_STRING:
           printf("%s", AS_CSTRING(value));
           break;
     
    object.c
    @@ -1118,7 +1118,7 @@

    25 . 325 . 3 . 1Upvalues in closures

    When I first introduced upvalues, I said each closure has an array of them. We’ve finally worked our way back to implementing that.

    -
      ObjFunction* function;
    +
      ObjFunction* function;
     
    object.h
    in struct ObjClosure
      ObjUpvalue** upvalues;
    @@ -1140,7 +1140,7 @@ 

    25 .&

    When we create an ObjClosure, we allocate an upvalue array of the proper size, which we determined at compile time and stored in the ObjFunction.

    -
    ObjClosure* newClosure(ObjFunction* function) {
    +
    ObjClosure* newClosure(ObjFunction* function) {
     
    object.c
    in newClosure()
      ObjUpvalue** upvalues = ALLOCATE(ObjUpvalue*,
    @@ -1159,7 +1159,7 @@ 

    25 .& memory manager never sees uninitialized memory.

    Then we store the array in the new closure, as well as copy the count over from the ObjFunction.

    -
      closure->function = function;
    +
      closure->function = function;
     
    object.c
    in newClosure()
      closure->upvalues = upvalues;
    @@ -1169,7 +1169,7 @@ 

    25 .&
    object.c, in newClosure()

    When we free an ObjClosure, we also free the upvalue array.

    -
        case OBJ_CLOSURE: {
    +
        case OBJ_CLOSURE: {
     
    memory.c
    in freeObject()
          ObjClosure* closure = (ObjClosure*)object;
    @@ -1184,7 +1184,7 @@ 

    25 .&

    We fill the upvalue array over in the interpreter when it creates a closure. This is where we walk through all of the operands after OP_CLOSURE to see what kind of upvalue each slot captures.

    -
            push(OBJ_VAL(closure));
    +
            push(OBJ_VAL(closure));
     
    vm.c
    in run()
            for (int i = 0; i < closure->upvalueCount; i++) {
    @@ -1218,9 +1218,9 @@ 

    25 .& window. That window begins at frame->slots, which points to slot zero. Adding index offsets that to the local slot we want to capture. We pass that pointer here:

    -
    vm.c
    +
    vm.c
    add after callValue()
    -
    static ObjUpvalue* captureUpvalue(Value* local) {
    +
    static ObjUpvalue* captureUpvalue(Value* local) {
       ObjUpvalue* createdUpvalue = newUpvalue(local);
       return createdUpvalue;
     }
    @@ -1237,7 +1237,7 @@ 

    25 .& an array full of upvalues pointing to variables.

    With that in hand, we can implement the instructions that work with those upvalues.

    -
          }
    +
          }
     
    vm.c
    in run()
          case OP_GET_UPVALUE: {
    @@ -1252,7 +1252,7 @@ 

    25 .&

    The operand is the index into the current function’s upvalue array. So we simply look up the corresponding upvalue and dereference its location pointer to read the value in that slot. Setting a variable is similar.

    -
          }
    +
          }
     
    vm.c
    in run()
          case OP_SET_UPVALUE: {
    @@ -1278,7 +1278,7 @@ 

    25 .&

    This is a milestone. As long as all of the variables remain on the stack, we have working closures. Try this:

    -
    fun outer() {
    +
    fun outer() {
       var x = "outside";
       fun inner() {
         print x;
    @@ -1292,7 +1292,7 @@ 

    25 . 4Of course, a key feature of closures is that they hold on to the variable as long as needed, even after the function that declares the variable has returned. Here’s another example that should work:

    -
    fun outer() {
    +
    fun outer() {
       var x = "outside";
       fun inner() {
         print x;
    @@ -1318,7 +1318,7 @@ 

    25 .& -
    var globalSet;
    +
    var globalSet;
     var globalGet;
     
     fun main() {
    @@ -1412,7 +1412,7 @@ 

    25 . 4& capture, but not in the other direction. So we first need to add some extra tracking inside the existing Local struct so that we can tell if a given local is captured by a closure.

    -
      int depth;
    +
      int depth;
     
    compiler.c
    in struct Local
      bool isCaptured;
    @@ -1422,7 +1422,7 @@ 

    25 . 4&

    This field is true if the local is captured by any later nested function declaration. Initially, all locals are not captured.

    -
      local->depth = -1;
    +
      local->depth = -1;
     
    compiler.c
    in addLocal()
      local->isCaptured = false;
    @@ -1436,7 +1436,7 @@ 

    25 . 4&

    Later in the book, it will become possible for a user to capture this variable. Just building some anticipation here.

    -
      local->depth = 0;
    +
      local->depth = 0;
     
    compiler.c
    in initCompiler()
      local->isCaptured = false;
    @@ -1446,7 +1446,7 @@ 

    25 . 4&

    When resolving an identifier, if we end up creating an upvalue for a local variable, we mark it as captured.

    -
      if (local != -1) {
    +
      if (local != -1) {
     
    compiler.c
    in resolveUpvalue()
        compiler->enclosing->locals[local].isCaptured = true;
    @@ -1457,7 +1457,7 @@ 

    25 . 4&

    Now, at the end of a block scope when the compiler emits code to free the stack slots for the locals, we can tell which ones need to get hoisted onto the heap. We’ll use a new instruction for that.

    -
      while (current->localCount > 0 &&
    +
      while (current->localCount > 0 &&
              current->locals[current->localCount - 1].depth >
                 current->scopeDepth) {
     
    compiler.c
    @@ -1476,7 +1476,7 @@

    25 . 4&

    The instruction requires no operand. We know that the variable will always be right on top of the stack at the point that this instruction executes. We declare the instruction.

    -
      OP_CLOSURE,
    +
      OP_CLOSURE,
     
    chunk.h
    in enum OpCode
      OP_CLOSE_UPVALUE,
    @@ -1485,7 +1485,7 @@ 

    25 . 4&
    chunk.h, in enum OpCode

    And add trivial disassembler support for it:

    -
        }
    +
        }
     
    debug.c
    in disassembleInstruction()
        case OP_CLOSE_UPVALUE:
    @@ -1544,7 +1544,7 @@ 

    25̴ That suggests using a linked list instead of a dynamic array. Since we defined the ObjUpvalue struct ourselves, the easiest implementation is an intrusive list that puts the next pointer right inside the ObjUpvalue struct itself.

    -
      Value* location;
    +
      Value* location;
     
    object.h
    in struct ObjUpvalue
      struct ObjUpvalue* next;
    @@ -1554,7 +1554,7 @@ 

    25̴

    When we allocate an upvalue, it is not attached to any list yet so the link is NULL.

    -
      upvalue->location = slot;
    +
      upvalue->location = slot;
     
    object.c
    in newUpvalue()
      upvalue->next = NULL;
    @@ -1563,7 +1563,7 @@ 

    25̴
    object.c, in newUpvalue()

    The VM owns the list, so the head pointer goes right inside the main VM struct.

    -
      Table strings;
    +
      Table strings;
     
    vm.h
    in struct VM
      ObjUpvalue* openUpvalues;
    @@ -1572,7 +1572,7 @@ 

    25̴
    vm.h, in struct VM

    The list starts out empty.

    -
      vm.frameCount = 0;
    +
      vm.frameCount = 0;
     
    vm.c
    in resetStack()
      vm.openUpvalues = NULL;
    @@ -1583,7 +1583,7 @@ 

    25̴

    Starting with the first upvalue pointed to by the VM, each open upvalue points to the next open upvalue that references a local variable farther down the stack. This script, for example,

    -
    {
    +
    {
       var a = 1;
       fun f() {
         print a;
    @@ -1601,7 +1601,7 @@ 

    25̴

    should produce a series of linked upvalues like so:

    Three upvalues in a linked list.

    Whenever we close over a local variable, before creating a new upvalue, we look for an existing one in the list.

    -
    static ObjUpvalue* captureUpvalue(Value* local) {
    +
    static ObjUpvalue* captureUpvalue(Value* local) {
     
    vm.c
    in captureUpvalue()
      ObjUpvalue* prevUpvalue = NULL;
    @@ -1649,7 +1649,7 @@ 

    25̴

    In the first case, we’re done and we’ve returned. Otherwise, we create a new upvalue for our local slot and insert it into the list at the right location.

    -
      ObjUpvalue* createdUpvalue = newUpvalue(local);
    +
      ObjUpvalue* createdUpvalue = newUpvalue(local);
     
    vm.c
    in captureUpvalue()
      createdUpvalue->next = upvalue;
    @@ -1691,7 +1691,7 @@ 

    The compiler helpfully emits an OP_CLOSE_UPVALUE instruction to tell the VM exactly when a local variable should be hoisted onto the heap. Executing that instruction is the interpreter’s responsibility.

    -
          }
    +
          }
     
    vm.c
    in run()
          case OP_CLOSE_UPVALUE:
    @@ -1708,9 +1708,9 @@ 

    pop().

    The fun stuff happens here:

    -
    vm.c
    +
    vm.c
    add after captureUpvalue()
    -
    static void closeUpvalues(Value* last) {
    +
    static void closeUpvalues(Value* last) {
       while (vm.openUpvalues != NULL &&
              vm.openUpvalues->location >= last) {
         ObjUpvalue* upvalue = vm.openUpvalues;
    @@ -1746,7 +1746,7 @@ 

    We don’t need to change how OP_GET_UPVALUE and OP_SET_UPVALUE are interpreted at all. That keeps them simple, which in turn keeps them fast. We do need to add the new field to ObjUpvalue, though.

    -
      Value* location;
    +
      Value* location;
     
    object.h
    in struct ObjUpvalue
      Value closed;
    @@ -1756,7 +1756,7 @@ 

    And we should zero it out when we create an ObjUpvalue so there’s no uninitialized memory floating around.

    -
      ObjUpvalue* upvalue = ALLOCATE_OBJ(ObjUpvalue, OBJ_UPVALUE);
    +
      ObjUpvalue* upvalue = ALLOCATE_OBJ(ObjUpvalue, OBJ_UPVALUE);
     
    object.c
    in newUpvalue()
      upvalue->closed = NIL_VAL;
    @@ -1779,7 +1779,7 @@ 

    This is the reason closeUpvalues() accepts a pointer to a stack slot. When a function returns, we call that same helper and pass in the first stack slot owned by the function.

    -
            Value result = pop();
    +
            Value result = pop();
     
    vm.c
    in run()
            closeUpvalues(frame->slots);
    @@ -1864,7 +1864,7 @@ 

    Design Note: Closing Over the Loop V share a reference to the same underlying storage location. This fact is visible when new values are assigned to the variable. Obviously, if two closures capture different variables, there is no sharing.

    -
    var globalOne;
    +
     

    This prints “one” then “two”. In this example, it’s pretty clear that the two a variables are different. But it’s not always so obvious. Consider:

    -
    var globalOne;
    +
    var globalOne;
     var globalTwo;
     
     fun main() {
    @@ -1920,7 +1920,7 @@ 

    Design Note: Closing Over the Loop V each iteration get its own distinct a variable?

    The script here is strange and contrived, but this does show up in real code in languages that aren’t as minimal as clox. Here’s a JavaScript example:

    -
    var closures = [];
    +
    var closures = [];
     for (var i = 1; i <= 2; i++) {
       closures.push(function () { console.log(i); });
     }
    @@ -1941,7 +1941,7 @@ 

    Design Note: Closing Over the Loop V

    If you’re familiar with JavaScript, you probably know that variables declared using var are implicitly hoisted to the surrounding function or top-level scope. It’s as if you really wrote this:

    -
    var closures = [];
    +
     

    At that point, it’s clearer that there is only a single i. Now consider if you change the program to use the newer let keyword:

    -
    var closures = [];
    +
    var closures = [];
     for (let i = 1; i <= 2; i++) {
       closures.push(function () { console.log(i); });
     }
    @@ -1965,7 +1965,7 @@ 

    Design Note: Closing Over the Loop V it. The increment clause is i++. That looks very much like it is assigning to and mutating an existing variable, not creating a new one.

    Let’s try some other languages. Here’s Python:

    -
    closures = []
    +
    closures = []
     for i in range(1, 3):
       closures.append(lambda: print(i))
     
    @@ -1979,7 +1979,7 @@ 

    Design Note: Closing Over the Loop V value, so this prints “2” twice.

    What about Ruby? Ruby has two typical ways to iterate numerically. Here’s the classic imperative style:

    -
    closures = []
    +
     

    This, like Python, prints “2” twice. But the more idiomatic Ruby style is using a higher-order each() method on range objects:

    -
    closures = []
    +
    closures = []
     (1..2).each do |i|
       closures << lambda { puts i }
     end
    diff --git a/site/compiling-expressions.html b/site/compiling-expressions.html
    index 34803710a..1671b985f 100644
    --- a/site/compiling-expressions.html
    +++ b/site/compiling-expressions.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Compiling Expressions · Crafting Interpreters
    @@ -133,7 +133,7 @@ 

    Compiling Expressions

    through. You have to eat your vegetables before you get dessert. First, let’s ditch that temporary scaffolding we wrote for testing the scanner and replace it with something more useful.

    -
    InterpretResult interpret(const char* source) {
    +
    InterpretResult interpret(const char* source) {
     
    vm.c
    in interpret()
    replace 2 lines
    @@ -164,7 +164,7 @@

    Compiling Expressions

    Otherwise, we send the completed chunk over to the VM to be executed. When the VM finishes, we free the chunk and we’re done. As you can see, the signature to compile() is different now.

    -
    #define clox_compiler_h
    +
    #define clox_compiler_h
     
     
    compiler.h
    replace 1 line
    @@ -180,7 +180,7 @@

    Compiling Expressions

    We pass in the chunk where the compiler will write the code, and then compile() returns whether or not compilation succeeded. We make the same change to the signature in the implementation.

    -
    #include "scanner.h"
    +
    #include "scanner.h"
     
     
    compiler.c
    function compile()
    @@ -193,7 +193,7 @@

    Compiling Expressions

    That call to initScanner() is the only line that survives this chapter. Rip out the temporary code we wrote to test the scanner and replace it with these three lines:

    -
      initScanner(source);
    +
      initScanner(source);
     
    compiler.c
    in compile()
    replace 13 lines
    @@ -267,7 +267,7 @@

    17

    17 . 2Parsing Tokens

    First up, the front half of the compiler. This function’s name should sound familiar.

    -
    #include "scanner.h"
    +
    #include "scanner.h"
     
    compiler.c
     
    @@ -294,7 +294,7 @@ 

    17 . 2We keep looping, reading tokens and reporting the errors, until we hit a non-error one or reach the end. That way, the rest of the parser sees only valid tokens. The current and previous token are stored in this struct:

    -
    #include "scanner.h"
    +
    #include "scanner.h"
     
    compiler.c
     
    @@ -316,9 +316,9 @@ 

    17 . 217 . 2 . 1Handling syntax errors

    If the scanner hands us an error token, we need to actually tell the user. That happens using this:

    -
    compiler.c
    +
    compiler.c
    add after variable parser
    -
    static void errorAtCurrent(const char* message) {
    +
    static void errorAtCurrent(const char* message) {
       errorAt(&parser.current, message);
     }
     
    @@ -328,18 +328,18 @@

    17̴ the error occurred and forward it to errorAt(). More often, we’ll report an error at the location of the token we just consumed, so we give the shorter name to this other function:

    -
    compiler.c
    +
    compiler.c
    add after variable parser
    -
    static void error(const char* message) {
    +
    static void error(const char* message) {
       errorAt(&parser.previous, message);
     }
     
    compiler.c, add after variable parser

    The actual work happens here:

    -
    compiler.c
    +
    compiler.c
    add after variable parser
    -
    static void errorAt(Token* token, const char* message) {
    +
    static void errorAt(Token* token, const char* message) {
       fprintf(stderr, "[line %d] Error", token->line);
     
       if (token->type == TOKEN_EOF) {
    @@ -360,7 +360,7 @@ 

    17̴ human-readable. Then we print the error message itself. After that, we set this hadError flag. That records whether any errors occurred during compilation. This field also lives in the parser struct.

    -
      Token previous;
    +
      Token previous;
     
    compiler.c
    in struct Parser
      bool hadError;
    @@ -370,7 +370,7 @@ 

    17̴

    Earlier I said that compile() should return false if an error occurred. Now we can make it do that.

    -
      consume(TOKEN_EOF, "Expect end of expression.");
    +
      consume(TOKEN_EOF, "Expect end of expression.");
     
    compiler.c
    in compile()
      return !parser.hadError;
    @@ -392,7 +392,7 @@ 

    17̴ too easy to leak memory, forget to maintain invariants, or otherwise have a Very Bad Day.

    -
      bool hadError;
    +
      bool hadError;
     
    compiler.c
    in struct Parser
      bool panicMode;
    @@ -401,7 +401,7 @@ 

    17̴
    compiler.c, in struct Parser

    When an error occurs, we set it.

    -
    static void errorAt(Token* token, const char* message) {
    +
    static void errorAt(Token* token, const char* message) {
     
    compiler.c
    in errorAt()
      parser.panicMode = true;
    @@ -413,7 +413,7 @@ 

    17̴ occurred. The bytecode will never get executed, so it’s harmless to keep on trucking. The trick is that while the panic mode flag is set, we simply suppress any other errors that get detected.

    -
    static void errorAt(Token* token, const char* message) {
    +
    static void errorAt(Token* token, const char* message) {
     
    compiler.c
    in errorAt()
      if (parser.panicMode) return;
    @@ -426,7 +426,7 @@ 

    17̴ reaches a synchronization point. For Lox, we chose statement boundaries, so when we later add those to our compiler, we’ll clear the flag there.

    These new fields need to be initialized.

    -
      initScanner(source);
    +
      initScanner(source);
     
    compiler.c
    in compile()
    @@ -439,7 +439,7 @@ 

    17̴
    compiler.c, in compile()

    And to display the errors, we need a standard header.

    -
    #include <stdio.h>
    +
    #include <stdio.h>
     
    compiler.c
    #include <stdlib.h>
     
    @@ -449,9 +449,9 @@ 

    17̴
    compiler.c

    There’s one last parsing function, another old friend from jlox.

    -
    compiler.c
    +
    compiler.c
    add after advance()
    -
    static void consume(TokenType type, const char* message) {
    +
    static void consume(TokenType type, const char* message) {
       if (parser.current.type == type) {
         advance();
         return;
    @@ -470,9 +470,9 @@ 

    17 . 

    After we parse and understand a piece of the user’s program, the next step is to translate that to a series of bytecode instructions. It starts with the easiest possible step: appending a single byte to the chunk.

    -
    compiler.c
    +
    compiler.c
    add after consume()
    -
    static void emitByte(uint8_t byte) {
    +
    static void emitByte(uint8_t byte) {
       writeChunk(currentChunk(), byte, parser.previous.line);
     }
     
    @@ -484,7 +484,7 @@

    17 .  associated with that line.

    The chunk that we’re writing gets passed into compile(), but it needs to make its way to emitByte(). To do that, we rely on this intermediary function:

    -
    Parser parser;
    +
    Parser parser;
     
    compiler.c
    add after variable parser
    Chunk* compilingChunk;
    @@ -502,7 +502,7 @@ 

    17 .  notion of “current chunk” gets more complicated. To avoid having to go back and change a lot of code, I encapsulate that logic in the currentChunk() function.

    We initialize this new module variable before we write any bytecode:

    -
    bool compile(const char* source, Chunk* chunk) {
    +
    bool compile(const char* source, Chunk* chunk) {
       initScanner(source);
     
    compiler.c
    in compile()
    @@ -514,7 +514,7 @@

    17 . 
    compiler.c, in compile()

    Then, at the very end, when we’re done compiling the chunk, we wrap things up.

    -
      consume(TOKEN_EOF, "Expect end of expression.");
    +
      consume(TOKEN_EOF, "Expect end of expression.");
     
    compiler.c
    in compile()
      endCompiler();
    @@ -523,9 +523,9 @@ 

    17 . 
    compiler.c, in compile()

    That calls this:

    -
    compiler.c
    +
    compiler.c
    add after emitByte()
    -
    static void endCompiler() {
    +
    static void endCompiler() {
       emitReturn();
     }
     
    @@ -535,18 +535,18 @@

    17 .  parse, compile, and execute a single expression, then print the result. To print that value, we are temporarily using the OP_RETURN instruction. So we have the compiler add one of those to the end of the chunk.

    -
    compiler.c
    +
    compiler.c
    add after emitByte()
    -
    static void emitReturn() {
    +
    static void emitReturn() {
       emitByte(OP_RETURN);
     }
     
    compiler.c, add after emitByte()

    While we’re here in the back end we may as well make our lives easier.

    -
    compiler.c
    +
    compiler.c
    add after emitByte()
    -
    static void emitBytes(uint8_t byte1, uint8_t byte2) {
    +
    static void emitBytes(uint8_t byte1, uint8_t byte2) {
       emitByte(byte1);
       emitByte(byte2);
     }
    @@ -559,9 +559,9 @@ 

    We’ve assembled our parsing and code generation utility functions. The missing piece is the code in the middle that connects those together.

    Parsing functions on the left, bytecode emitting functions on the right. What goes in the middle?

    The only step in compile() that we have left to implement is this function:

    -
    compiler.c
    +
    compiler.c
    add after endCompiler()
    -
    static void expression() {
    +
    static void expression() {
       // What goes here?
     }
     
    @@ -589,9 +589,9 @@

    17 .̴ an expression of that token type.

    To compile number literals, we store a pointer to the following function at the TOKEN_NUMBER index in the array.

    -
    compiler.c
    +
    compiler.c
    add after endCompiler()
    -
    static void number() {
    +
    static void number() {
       double value = strtod(parser.previous.start, NULL);
       emitConstant(value);
     }
    @@ -602,9 +602,9 @@ 

    17 .̴ stored in previous. We take that lexeme and use the C standard library to convert it to a double value. Then we generate the code to load that value using this function:

    -
    compiler.c
    +
    compiler.c
    add after emitReturn()
    -
    static void emitConstant(Value value) {
    +
    static void emitConstant(Value value) {
       emitBytes(OP_CONSTANT, makeConstant(value));
     }
     
    @@ -613,9 +613,9 @@

    17 .̴

    First, we add the value to the constant table, then we emit an OP_CONSTANT instruction that pushes it onto the stack at runtime. To insert an entry in the constant table, we rely on:

    -
    compiler.c
    +
    compiler.c
    add after emitReturn()
    -
    static uint8_t makeConstant(Value value) {
    +
    static uint8_t makeConstant(Value value) {
       int constant = addConstant(currentChunk(), value);
       if (constant > UINT8_MAX) {
         error("Too many constants in one chunk.");
    @@ -653,9 +653,9 @@ 

    17&

    It turns out our function pointer array handles those too. The parsing function for an expression type can consume any additional tokens that it wants to, just like in a regular recursive descent parser. Here’s how parentheses work:

    -
    compiler.c
    +
    compiler.c
    add after endCompiler()
    -
    static void grouping() {
    +
    static void grouping() {
       expression();
       consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
     }
    @@ -677,9 +677,9 @@ 

    17& expression inside the parentheses.

    17 . 4 . 3Unary negation

    Unary minus is also a prefix expression, so it works with our model too.

    -
    compiler.c
    +
    compiler.c
    add after number()
    -
    static void unary() {
    +
    static void unary() {
       TokenType operatorType = parser.previous.type;
     
       // Compile the operand.
    @@ -720,7 +720,7 @@ 

    17 . 4̴ doesn’t matter, except that we use that token for the line number to associate with that instruction.

    This means if you have a multi-line negation expression, like:

    -
    print -
    +
    print -
       true;
     

    Then the runtime error will be reported on the wrong line. Here, it would show @@ -732,7 +732,7 @@

    17 . 4̴ calls will parse any expression for the operand, regardless of precedence. Once we add binary operators and other syntax, that will do the wrong thing. Consider:

    -
    -a.b + c;
    +
    -a.b + c;
     

    Here, the operand to - should be just the a.b expression, not the entire a.b + c. But if unary() calls expression(), the latter will happily chew @@ -748,9 +748,9 @@

    17 . 4̴ Each only parses exactly one type of expression. They don’t cascade to include higher-precedence expression types too. We need a different solution, and it looks like this:

    -
    compiler.c
    +
    compiler.c
    add after unary()
    -
    static void parsePrecedence(Precedence precedence) {
    +
    static void parsePrecedence(Precedence precedence) {
       // What goes here?
     }
     
    @@ -762,7 +762,7 @@

    17 . 4̴ probably guess that it will use that table of parsing function pointers I’ve been talking about. For now, don’t worry too much about how it works. In order to take the “precedence” as a parameter, we define it numerically.

    -
    } Parser;
    +
    } Parser;
     
    compiler.c
    add after struct Parser
    @@ -790,7 +790,7 @@ 

    17 . 4̴ C implicitly gives successively larger numbers for enums, this means that PREC_CALL is numerically larger than PREC_UNARY. For example, say the compiler is sitting on a chunk of code like:

    -
    -a.b + c
    +
    -a.b + c
     

    If we call parsePrecedence(PREC_ASSIGNMENT), then it will parse the entire expression because + has higher precedence than assignment. If instead we @@ -799,7 +799,7 @@

    17 . 4̴ than unary operators.

    With this function in hand, it’s a snap to fill in the missing body for expression().

    -
    static void expression() {
    +
    static void expression() {
     
    compiler.c
    in expression()
    replace 1 line
    @@ -811,7 +811,7 @@

    17 . 4̴

    We simply parse the lowest precedence level, which subsumes all of the higher-precedence expressions too. Now, to compile the operand for a unary expression, we call this new function and limit it to the appropriate level:

    -
      // Compile the operand.
    +
      // Compile the operand.
     
    compiler.c
    in unary()
    replace 1 line
    @@ -837,7 +837,7 @@

    1 binary operator until after we’ve parsed its left operand and then stumbled onto the operator token in the middle.

    Here’s an example:

    -
    1 + 2
    +
    1 + 2
     

    Let’s walk through trying to compile it with what we know so far:

      @@ -867,9 +867,9 @@

      1 types. The second column associates infix parser functions with token types.

      The function we will use as the infix parser for TOKEN_PLUS, TOKEN_MINUS, TOKEN_STAR, and TOKEN_SLASH is this:

      -
      compiler.c
      +
      compiler.c
      add after endCompiler()
      -
      static void binary() {
      +
      static void binary() {
         TokenType operatorType = parser.previous.type;
         ParseRule* rule = getRule(operatorType);
         parsePrecedence((Precedence)(rule->precedence + 1));
      @@ -903,7 +903,7 @@ 

      1

      The code that probably caught your eye here is that getRule() line. When we parse the right-hand operand, we again need to worry about precedence. Take an expression like:

      -
      2 * 3 + 4
      +
      2 * 3 + 4
       

      When we parse the right operand of the * expression, we need to just capture 3, and not 3 + 4, because + is lower precedence than *. We could define @@ -916,18 +916,18 @@

      1 -
      } Precedence;
      +
      } Precedence;
       
      compiler.c
      add after enum Precedence
      @@ -996,9 +996,9 @@ 

      17 . 6The table that drives our whole parser is an array of ParseRules. We’ve been talking about it forever, and finally you get to see it.

      -
      compiler.c
      +
      compiler.c
      add after unary()
      -
      ParseRule rules[] = {
      +
      ParseRule rules[] = {
         [TOKEN_LEFT_PAREN]    = {grouping, NULL,   PREC_NONE},
         [TOKEN_RIGHT_PAREN]   = {NULL,     NULL,   PREC_NONE},
         [TOKEN_LEFT_BRACE]    = {NULL,     NULL,   PREC_NONE}, 
      @@ -1065,9 +1065,9 @@ 

      17 . 6Now that we have the table, we are finally ready to write the code that uses it. This is where our Pratt parser comes to life. The easiest function to define is getRule().

      -
      compiler.c
      +
      compiler.c
      add after parsePrecedence()
      -
      static ParseRule* getRule(TokenType type) {
      +
      static ParseRule* getRule(TokenType type) {
         return &rules[type];
       }
       
      @@ -1087,7 +1087,7 @@

      17 . 6This is what happens when you write your VM in a language that was designed to be compiled on a PDP-11.

      -
        emitReturn();
      +
        emitReturn();
       }
       
      compiler.c
      add after endCompiler()
      @@ -1108,7 +1108,7 @@

      17

      Now we’re getting to the fun stuff. The maestro that orchestrates all of the parsing functions we’ve defined is parsePrecedence(). Let’s start with parsing prefix expressions.

      -
      static void parsePrecedence(Precedence precedence) {
      +
      static void parsePrecedence(Precedence precedence) {
       
      compiler.c
      in parsePrecedence()
      replace 1 line
      @@ -1132,7 +1132,7 @@

      17 tokens it needs, and returns back here. Infix expressions are where it gets interesting since precedence comes into play. The implementation is remarkably simple.

      -
        prefixRule();
      +
        prefixRule();
       
      compiler.c
      in parsePrecedence()
      @@ -1191,7 +1191,7 @@ 

      17 . 7

      Since this isn’t for end users, we hide it behind a flag.

      -
      #include <stdint.h>
      +
      #include <stdint.h>
       
       
      common.h
      #define DEBUG_PRINT_CODE
      @@ -1201,7 +1201,7 @@ 

      17 . 7When that flag is defined, we use our existing “debug” module to print out the chunk’s bytecode.

      -
        emitReturn();
      +
        emitReturn();
       
      compiler.c
      in endCompiler()
      #ifdef DEBUG_PRINT_CODE
      @@ -1218,7 +1218,7 @@ 

      17 . 7

      Finally, to access disassembleChunk(), we need to include its header.

      -
      #include "scanner.h"
      +
      #include "scanner.h"
       
      compiler.c
       
      @@ -1245,7 +1245,7 @@ 

      Challenges

      To really understand the parser, you need to see how execution threads through the interesting parsing functionsparsePrecedence() and the parser functions stored in the table. Take this (strange) expression:

      -
      (-1 + 2) * 3 - -4
      +
      (-1 + 2) * 3 - -4
       

      Write a trace of how those functions are called. Show the order they are called, which calls which, and the arguments passed to them.

      diff --git a/site/contents.html b/site/contents.html index 2d748dfea..10652fea2 100644 --- a/site/contents.html +++ b/site/contents.html @@ -1,5 +1,5 @@ - + Table of Contents · Crafting Interpreters diff --git a/site/control-flow.html b/site/control-flow.html index 68d784f15..c77dddfdb 100644 --- a/site/control-flow.html +++ b/site/control-flow.html @@ -1,5 +1,5 @@ - + Control Flow · Crafting Interpreters @@ -201,7 +201,7 @@

      9 .

      For simplicity’s sake, Lox doesn’t have a conditional operator, so let’s get our if statement on. Our statement grammar gets a new production.

      -
      statementexprStmt
      +
      statementexprStmt
                      | ifStmt
                      | printStmt
                      | block ;
      @@ -219,7 +219,7 @@ 

      9 . if the condition is truthy. Optionally, it may also have an else keyword and a statement to execute if the condition is falsey. The syntax tree node has fields for each of those three pieces.

      -
            "Expression : Expr expression",
      +
            "Expression : Expr expression",
       
      tool/GenerateAst.java
      in main()
            "If         : Expr condition, Stmt thenBranch," +
      @@ -233,7 +233,7 @@ 

      9 .

      Like other statements, the parser recognizes an if statement by the leading if keyword.

      -
        private Stmt statement() {
      +
        private Stmt statement() {
       
      lox/Parser.java
      in statement()
          if (match(IF)) return ifStatement();
      @@ -242,9 +242,9 @@ 

      9 .
      lox/Parser.java, in statement()

      When it finds one, it calls this new method to parse the rest:

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after statement()
      -
        private Stmt ifStatement() {
      +
        private Stmt ifStatement() {
           consume(LEFT_PAREN, "Expect '(' after 'if'.");
           Expr condition = expression();
           consume(RIGHT_PAREN, "Expect ')' after if condition."); 
      @@ -277,7 +277,7 @@ 

      9 . elseBranch field in the syntax tree is null.

      That seemingly innocuous optional else has, in fact, opened up an ambiguity in our grammar. Consider:

      -
      if (first) if (second) whenTrue(); else whenFalse();
      +
      if (first) if (second) whenTrue(); else whenFalse();
       

      Here’s the riddle: Which if statement does that else clause belong to? This isn’t just a theoretical question about how we notate our grammar. It actually @@ -312,9 +312,9 @@

      9 . for an else before returning, the innermost call to a nested series will claim the else clause for itself before returning to the outer if statements.

      Syntax in hand, we are ready to interpret.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitExpressionStmt()
      -
        @Override
      +
        @Override
         public Void visitIfStmt(Stmt.If stmt) {
           if (isTruthy(evaluate(stmt.condition))) {
             execute(stmt.thenBranch);
      @@ -342,7 +342,7 @@ 

      9 . 3

      These aren’t like other binary operators because they short-circuit. If, after evaluating the left operand, we know what the result of the logical expression must be, we don’t evaluate the right operand. For example:

      -
      false and sideEffect();
      +
      false and sideEffect();
       

      For an and expression to evaluate to something truthy, both operands must be truthy. We can see as soon as we evaluate the left false operand that that @@ -357,7 +357,7 @@

      9 . 3

      I’ve always wondered why they don’t have the same precedence, like the various comparison or equality operators do.

      -
      expressionassignment ;
      +
      expressionassignment ;
       assignmentIDENTIFIER "=" assignment
                      | logic_or ;
       logic_orlogic_and ( "or" logic_and )* ;
      @@ -377,7 +377,7 @@ 

      9 . 3 code path to handle the short circuiting. I think it’s cleaner to define a new class for these operators so that they get their own visit method.

      -
            "Literal  : Object value",
      +
            "Literal  : Object value",
       
      tool/GenerateAst.java
      in main()
            "Logical  : Expr left, Token operator, Expr right",
      @@ -390,7 +390,7 @@ 

      9 . 3

      To weave the new expressions into the parser, we first change the parsing code for assignment to call or().

      -
        private Expr assignment() {
      +
        private Expr assignment() {
       
      lox/Parser.java
      in assignment()
      replace 1 line
      @@ -402,9 +402,9 @@

      9 . 3
      lox/Parser.java, in assignment(), replace 1 line

      The code to parse a series of or expressions mirrors other binary operators.

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after assignment()
      -
        private Expr or() {
      +
        private Expr or() {
           Expr expr = and();
       
           while (match(OR)) {
      @@ -419,9 +419,9 @@ 

      9 . 3
      lox/Parser.java, add after assignment()

      Its operands are the next higher level of precedence, the new and expression.

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after or()
      -
        private Expr and() {
      +
        private Expr and() {
           Expr expr = equality();
       
           while (match(AND)) {
      @@ -437,9 +437,9 @@ 

      9 . 3

      That calls equality() for its operands, and with that, the expression parser is all tied back together again. We’re ready to interpret.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitLiteralExpr()
      -
        @Override
      +
        @Override
         public Object visitLogicalExpr(Expr.Logical expr) {
           Object left = evaluate(expr.left);
       
      @@ -465,7 +465,7 @@ 

      9 . 3 operator merely guarantees it will return a value with appropriate truthiness.

      Fortunately, we have values with proper truthiness right at handthe results of the operands themselves. So we use those. For example:

      -
      print "hi" or 2; // "hi".
      +
      print "hi" or 2; // "hi".
       print nil or "yes"; // "yes".
       

      On the first line, "hi" is truthy, so the or short-circuits and returns @@ -477,7 +477,7 @@

      9 . 3

      9 . 4While Loops

      Lox features two looping control flow statements, while and for. The while loop is the simpler one, so we’ll start there. Its grammar is the same as in C.

      -
      statementexprStmt
      +
      statementexprStmt
                      | ifStmt
                      | printStmt
                      | whileStmt
      @@ -489,7 +489,7 @@ 

      9 . 4Whil while. It takes a while keyword, followed by a parenthesized condition expression, then a statement for the body. That new grammar rule gets a syntax tree node.

      -
            "Print      : Expr expression",
      +
            "Print      : Expr expression",
       
            "Var        : Token name, Expr initializer",
       
      tool/GenerateAst.java
      in main()
      @@ -508,7 +508,7 @@

      9 . 4Whil

      Over in the parser, we follow the same process we used for if statements. First, we add another case in statement() to detect and match the leading keyword.

      -
          if (match(PRINT)) return printStatement();
      +
          if (match(PRINT)) return printStatement();
       
      lox/Parser.java
      in statement()
          if (match(WHILE)) return whileStatement();
      @@ -517,9 +517,9 @@ 

      9 . 4Whil
      lox/Parser.java, in statement()

      That delegates the real work to this method:

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after varDeclaration()
      -
        private Stmt whileStatement() {
      +
        private Stmt whileStatement() {
           consume(LEFT_PAREN, "Expect '(' after 'while'.");
           Expr condition = expression();
           consume(RIGHT_PAREN, "Expect ')' after condition.");
      @@ -532,9 +532,9 @@ 

      9 . 4Whil

      The grammar is dead simple and this is a straight translation of it to Java. Speaking of translating straight to Java, here’s how we execute the new syntax:

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitVarStmt()
      -
        @Override
      +
        @Override
         public Void visitWhileStmt(Stmt.While stmt) {
           while (isTruthy(evaluate(stmt.condition))) {
             execute(stmt.body);
      @@ -551,10 +551,10 @@ 

      9 . 4Whil

      9 . 5For Loops

      We’re down to the last control flow construct, Ye Olde C-style for loop. I probably don’t need to remind you, but it looks like this:

      -
      for (var i = 0; i < 10; i = i + 1) print i;
      +
      for (var i = 0; i < 10; i = i + 1) print i;
       

      In grammarese, that’s:

      -
      statementexprStmt
      +
      statementexprStmt
                      | forStmt
                      | ifStmt
                      | printStmt
      @@ -615,7 +615,7 @@ 

      9 . 5 .  how some of the nice expression forms supported by languages like ALGOL were a sweetener sprinkled over the more fundamentalbut presumably less palatablelambda calculus underneath.

      Slightly more than a spoonful of sugar. -
      {
      +
      {
         var i = 0;
         while (i < 10) {
           print i;
      @@ -641,7 +641,7 @@ 

      9 . 5 .  doesn’t save us much work, but it does give me an excuse to introduce you to the technique. So, unlike the previous statements, we won’t add a new syntax tree node. Instead, we go straight to parsing. First, add an import we’ll need soon.

      -
      import java.util.ArrayList;
      +
      import java.util.ArrayList;
       
      lox/Parser.java
      import java.util.Arrays;
       
      import java.util.List;
      @@ -649,7 +649,7 @@ 

      9 . 5 . 
      lox/Parser.java

      Like every statement, we start parsing a for loop by matching its keyword.

      -
        private Stmt statement() {
      +
        private Stmt statement() {
       
      lox/Parser.java
      in statement()
          if (match(FOR)) return forStatement();
      @@ -660,9 +660,9 @@ 

      9 . 5 . 

      Here is where it gets interesting. The desugaring is going to happen here, so we’ll build this method a piece at a time, starting with the opening parenthesis before the clauses.

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after statement()
      -
        private Stmt forStatement() {
      +
        private Stmt forStatement() {
           consume(LEFT_PAREN, "Expect '(' after 'for'.");
       
           // More here...
      @@ -671,7 +671,7 @@ 

      9 . 5 . 
      lox/Parser.java, add after statement()

      The first clause following that is the initializer.

      -
          consume(LEFT_PAREN, "Expect '(' after 'for'.");
      +
          consume(LEFT_PAREN, "Expect '(' after 'for'.");
       
       
      lox/Parser.java
      in forStatement()
      @@ -700,7 +700,7 @@

      9 . 5 .  true, I guess.

      Next up is the condition.

      -
            initializer = expressionStatement();
      +
            initializer = expressionStatement();
           }
       
      lox/Parser.java
      in forStatement()
      @@ -717,7 +717,7 @@

      9 . 5 . 

      Again, we look for a semicolon to see if the clause has been omitted. The last clause is the increment.

      -
          consume(SEMICOLON, "Expect ';' after loop condition.");
      +
          consume(SEMICOLON, "Expect ';' after loop condition.");
       
      lox/Parser.java
      in forStatement()
      @@ -736,7 +736,7 @@ 

      9 . 5 .  -
          consume(RIGHT_PAREN, "Expect ')' after for clauses.");
      +
          consume(RIGHT_PAREN, "Expect ')' after for clauses.");
       
      lox/Parser.java
      in forStatement()
          Stmt body = statement();
      @@ -753,7 +753,7 @@ 

      9 . 5 .  showed you earlier.

      The code is a little simpler if we work backward, so we start with the increment clause.

      -
          Stmt body = statement();
      +
          Stmt body = statement();
       
       
      lox/Parser.java
      in forStatement()
      @@ -771,7 +771,7 @@

      9 . 5 . 

      The increment, if there is one, executes after the body in each iteration of the loop. We do that by replacing the body with a little block that contains the original body followed by an expression statement that evaluates the increment.

      -
          }
      +
          }
       
       
      lox/Parser.java
      in forStatement()
      @@ -785,7 +785,7 @@

      9 . 5 . 

      Next, we take the condition and the body and build the loop using a primitive while loop. If the condition is omitted, we jam in true to make an infinite loop.

      -
          body = new Stmt.While(condition, body);
      +
          body = new Stmt.While(condition, body);
       
       
      lox/Parser.java
      in forStatement()
      @@ -806,7 +806,7 @@

      9 . 5 . 

      Finally, Lox is powerful enough to entertain us, at least for a few minutes. Here’s a tiny program to print the first 21 elements in the Fibonacci sequence:

      -
      var a = 0;
      +
      var a = 0;
       var temp;
       
       for (var b = 1; a < 10000; b = temp + b) {
      diff --git a/site/dedication.html b/site/dedication.html
      index 161636cf2..8b22690b6 100644
      --- a/site/dedication.html
      +++ b/site/dedication.html
      @@ -1,5 +1,5 @@
       
      -
      +
       
       
       Dedication · Crafting Interpreters
      diff --git a/site/evaluating-expressions.html b/site/evaluating-expressions.html
      index e178d91b8..35a4c8e85 100644
      --- a/site/evaluating-expressions.html
      +++ b/site/evaluating-expressions.html
      @@ -1,5 +1,5 @@
       
      -
      +
       
       
       Evaluating Expressions · Crafting Interpreters
      @@ -201,9 +201,9 @@ 

      7  That’s almost exactly what a real interpreter does, except instead of concatenating strings, it computes values.

      We start with a new class.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      create new file
      -
      package com.craftinginterpreters.lox;
      +
      package com.craftinginterpreters.lox;
       
       class Interpreter implements Expr.Visitor<Object> {
       }
      @@ -230,9 +230,9 @@ 

      7 .R

      So, much like we converted a literal token into a literal syntax tree node in the parser, now we convert the literal tree node into a runtime value. That turns out to be trivial.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      in class Interpreter
      -
        @Override
      +
        @Override
         public Object visitLiteralExpr(Expr.Literal expr) {
           return expr.value;
         }
      @@ -245,9 +245,9 @@ 

      7 .R

      7 . 2 . 2Evaluating parentheses

      The next simplest node to evaluate is groupingthe node you get as a result of using explicit parentheses in an expression.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      in class Interpreter
      -
        @Override
      +
        @Override
         public Object visitGroupingExpr(Expr.Grouping expr) {
           return evaluate(expr.expression);
         }
      @@ -265,9 +265,9 @@ 

      7  We do create a node for parentheses in Lox because we’ll need it later to correctly handle the left-hand sides of assignment expressions.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      in class Interpreter
      -
        private Object evaluate(Expr expr) {
      +
        private Object evaluate(Expr expr) {
           return expr.accept(this);
         }
       
      @@ -277,9 +277,9 @@

      Like grouping, unary expressions have a single subexpression that we must evaluate first. The difference is that the unary expression itself does a little work afterwards.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitLiteralExpr()
      -
        @Override
      +
        @Override
         public Object visitUnaryExpr(Expr.Unary expr) {
           Object right = evaluate(expr.right);
       
      @@ -310,7 +310,7 @@ 

      post-order traversaleach node evaluates its children before doing its own work.

      The other unary operator is logical not.

      -
          switch (expr.operator.type) {
      +
          switch (expr.operator.type) {
       
      lox/Interpreter.java
      in visitUnaryExpr()
            case BANG:
      @@ -345,9 +345,9 @@ 

      7&#

      Lox follows Ruby’s simple rule: false and nil are falsey, and everything else is truthy. We implement that like so:

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitUnaryExpr()
      -
        private boolean isTruthy(Object object) {
      +
        private boolean isTruthy(Object object) {
           if (object == null) return false;
           if (object instanceof Boolean) return (boolean)object;
           return true;
      @@ -358,9 +358,9 @@ 

      7&#

      7 . 2 . 5Evaluating binary operators

      On to the last expression tree class, binary operators. There’s a handful of them, and we’ll start with the arithmetic ones.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after evaluate()
      -
        @Override
      +
        @Override
         public Object visitBinaryExpr(Expr.Binary expr) {
           Object left = evaluate(expr.left);
           Object right = evaluate(expr.right); 
      @@ -391,7 +391,7 @@ 

      I think you can figure out what’s going on here. The main difference from the unary negation operator is that we have two operands to evaluate.

      I left out one arithmetic operator because it’s a little special.

      -
          switch (expr.operator.type) {
      +
          switch (expr.operator.type) {
             case MINUS:
               return (double)left - (double)right;
       
      lox/Interpreter.java
      @@ -424,7 +424,7 @@

      Next up are the comparison operators.

      -
          switch (expr.operator.type) {
      +
          switch (expr.operator.type) {
       
      lox/Interpreter.java
      in visitBinaryExpr()
            case GREATER:
      @@ -443,9 +443,9 @@ 

      The last pair of operators are equality.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      in visitBinaryExpr()
      -
            case BANG_EQUAL: return !isEqual(left, right);
      +
            case BANG_EQUAL: return !isEqual(left, right);
             case EQUAL_EQUAL: return isEqual(left, right);
       
      lox/Interpreter.java, in visitBinaryExpr()
      @@ -458,9 +458,9 @@

      Spoiler alert: it’s not.

      Like truthiness, the equality logic is hoisted out into a separate method.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after isTruthy()
      -
        private boolean isEqual(Object a, Object b) {
      +
        private boolean isEqual(Object a, Object b) {
           if (a == null && b == null) return true;
           if (a == null) return false;
       
      @@ -479,7 +479,7 @@ 

      7 . 4 . 2Running the interpreter

      Now that we have an interpreter, the Lox class can start using it.

      -
      public class Lox {
      +
      public class Lox {
       
      lox/Lox.java
      in class Lox
        private static final Interpreter interpreter = new Interpreter();
      @@ -822,7 +822,7 @@ 

      7R persist throughout the REPL session.

      Finally, we remove the line of temporary code from the last chapter for printing the syntax tree and replace it with this:

      -
          // Stop if there was a syntax error.
      +
          // Stop if there was a syntax error.
           if (hadError) return;
       
       
      lox/Lox.java
      @@ -885,7 +885,7 @@

      Design Note: Static and Dynamic Typi cast is checked at runtime and throws an exception on failure.

      A more subtle example is covariant arrays in Java and C#. The static subtyping rules for arrays allow operations that are not sound. Consider:

      -
      Object[] stuff = new Integer[1];
      +
      Object[] stuff = new Integer[1];
       stuff[0] = "not an int!";
       

      This code compiles without any errors. The first line upcasts the Integer array diff --git a/site/functions.html b/site/functions.html index 84ea9d625..463e8968c 100644 --- a/site/functions.html +++ b/site/functions.html @@ -1,5 +1,5 @@ - + Functions · Crafting Interpreters @@ -112,7 +112,7 @@

      Functions

      10 . 1Function Calls

      You’re certainly familiar with C-style function call syntax, but the grammar is more subtle than you may realize. Calls are typically to named functions like:

      -
      average(1, 2);
      +
      average(1, 2);
       

      But the name of the function being called isn’t actually part of the call syntax. The thing being calledthe calleecan be any expression that evaluates to a function. (Well, it does have to be a @@ -122,7 +122,7 @@

      10 . 1The name is part of the call syntax in Pascal. You can call only named functions or functions stored directly in variables.

      -
      getCallback()();
      +
      getCallback()();
       

      There are two call expressions here. The first pair of parentheses has getCallback as its callee. But the second call has the entire getCallback() @@ -133,7 +133,7 @@

      10 . 1unary rule bubble up to a new call rule.

      -
      unary          → ( "!" | "-" ) unary | call ;
      +
      unary          → ( "!" | "-" ) unary | call ;
       callprimary ( "(" arguments? ")" )* ;
       

      This rule matches a primary expression followed by zero or more function calls. @@ -152,7 +152,7 @@

      10 . 1other well-known functional language), is baked directly into the language syntax so it’s not as weird looking as it would be here.

      -
      argumentsexpression ( "," expression )* ;
      +
      argumentsexpression ( "," expression )* ;
       

      This rule requires at least one argument expression, followed by zero or more other expressions, each preceded by a comma. To handle zero-argument calls, the @@ -163,7 +163,7 @@

      10 . 1

      Over in our syntax tree generator, we add a new node.

      -
            "Binary   : Expr left, Token operator, Expr right",
      +
            "Binary   : Expr left, Token operator, Expr right",
       
      tool/GenerateAst.java
      in main()
            "Call     : Expr callee, Token paren, List<Expr> arguments",
      @@ -179,7 +179,7 @@ 

      10 . 1

      Crack open the parser. Where unary() used to jump straight to primary(), change it to call, well, call().

      -
            return new Expr.Unary(operator, right);
      +
            return new Expr.Unary(operator, right);
           }
       
       

      Its definition is:

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after unary()
      -
        private Expr call() {
      +
        private Expr call() {
           Expr expr = primary();
       
           while (true) { 
      @@ -222,9 +222,9 @@ 

      10 . 1

      The code to parse the argument list is in this helper:

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after unary()
      -
        private Expr finishCall(Expr callee) {
      +
        private Expr finishCall(Expr callee) {
           List<Expr> arguments = new ArrayList<>();
           if (!check(RIGHT_PAREN)) {
             do {
      @@ -265,7 +265,7 @@ 

      10 number of arguments will simplify our bytecode interpreter in Part III. We want our two interpreters to be compatible with each other, even in weird corner cases like this, so we’ll add the same limit to jlox.

      -
            do {
      +
            do {
       
      lox/Parser.java
      in finishCall()
              if (arguments.size() >= 255) {
      @@ -285,7 +285,7 @@ 

      We don’t have any functions we can call, so it seems weird to start implementing calls first, but we’ll worry about that when we get there. First, our interpreter needs a new import.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      import java.util.ArrayList;
       
      import java.util.List;
       
      @@ -293,9 +293,9 @@

      As always, interpretation starts with a new visit method for our new call expression node.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitBinaryExpr()
      -
        @Override
      +
        @Override
         public Object visitCallExpr(Expr.Call expr) {
           Object callee = evaluate(expr.callee);
       
      @@ -333,9 +333,9 @@ 

      There isn’t too much to this new interface.

      -
      lox/LoxCallable.java
      +
      lox/LoxCallable.java
      create new file
      -
      package com.craftinginterpreters.lox;
      +
      package com.craftinginterpreters.lox;
       
       import java.util.List;
       
      @@ -353,13 +353,13 @@ 

      10 . 1& little more robust. It currently ignores a couple of failure modes that we can’t pretend won’t occur. First, what happens if the callee isn’t actually something you can call? What if you try to do this:

      -
      "totally not a function"();
      +
      "totally not a function"();
       

      Strings aren’t callable in Lox. The runtime representation of a Lox string is a Java string, so when we cast that to LoxCallable, the JVM will throw a ClassCastException. We don’t want our interpreter to vomit out some nasty Java stack trace and die. Instead, we need to check the type ourselves first.

      -
          }
      +
          }
       
       
      lox/Interpreter.java
      in visitCallExpr()
      @@ -379,13 +379,13 @@

      10 . 1̴ for the number of arguments a function or operation expects. Unary operators have arity one, binary operators two, etc. With functions, the arity is determined by the number of parameters it declares.

      -
      fun add(a, b, c) {
      +
      fun add(a, b, c) {
         print a + b + c;
       }
       

      This function defines three parameters, a, b, and c, so its arity is three and it expects three arguments. So what if you try to call it like this:

      -
      add(1, 2, 3, 4); // Too many.
      +
      add(1, 2, 3, 4); // Too many.
       add(1, 2);       // Too few.
       

      Different languages take different approaches to this problem. Of course, most @@ -400,7 +400,7 @@

      10 . 1̴ the sooner the implementation draws my attention to it, the better. So for Lox, we’ll take Python’s approach. Before invoking the callable, we check to see if the argument list’s length matches the callable’s arity.

      -
          LoxCallable function = (LoxCallable)callee;
      +
          LoxCallable function = (LoxCallable)callee;
       
      lox/Interpreter.java
      in visitCallExpr()
          if (arguments.size() != function.arity()) {
      @@ -414,7 +414,7 @@ 

      10 . 1̴
      lox/Interpreter.java, in visitCallExpr()

      That requires a new method on the LoxCallable interface to ask it its arity.

      -
      interface LoxCallable {
      +
      interface LoxCallable {
       
      lox/LoxCallable.java
      in interface LoxCallable
        int arity();
      @@ -493,7 +493,7 @@ 

      10 . 2 .& successive invocations tells you how much time elapsed between the two calls. This function is defined in the global scope, so let’s ensure the interpreter has access to that.

      -
      class Interpreter implements Expr.Visitor<Object>,
      +
      class Interpreter implements Expr.Visitor<Object>,
                                    Stmt.Visitor<Void> {
       
      lox/Interpreter.java
      in class Interpreter
      @@ -511,7 +511,7 @@

      10 . 2 .& fixed reference to the outermost global environment.

      When we instantiate an Interpreter, we stuff the native function in that global scope.

      -
        private Environment environment = globals;
      +
        private Environment environment = globals;
       
       
      lox/Interpreter.java
      in class Interpreter
      @@ -566,19 +566,19 @@

      10  syntactic sugar for two distinct steps: (1) creating a new function object, and (2) binding a new variable to it. If Lox had syntax for anonymous functions, we wouldn’t need function declaration statements. You could just do:

      -
      var add = fun (a, b) {
      +
      var add = fun (a, b) {
         print a + b;
       };
       

      However, since named functions are the common case, I went ahead and gave Lox nice syntax for them.

      -
      declarationfunDecl
      +
      declarationfunDecl
                      | varDecl
                      | statement ;
       

      The updated declaration rule references this new rule:

      -
      funDecl"fun" function ;
      +
      funDecl"fun" function ;
       functionIDENTIFIER "(" parameters? ")" block ;
       

      The main funDecl rule uses a separate helper rule function. A function @@ -592,12 +592,12 @@

      10 

      The function itself is a name followed by the parenthesized parameter list and the body. The body is always a braced block, using the same grammar rule that block statements use. The parameter list uses this rule:

      -
      parametersIDENTIFIER ( "," IDENTIFIER )* ;
      +
      parametersIDENTIFIER ( "," IDENTIFIER )* ;
       

      It’s like the earlier arguments rule, except that each parameter is an identifier, not an expression. That’s a lot of new syntax for the parser to chew through, but the resulting AST node isn’t too bad.

      -
            "Expression : Expr expression",
      +
            "Expression : Expr expression",
       
      tool/GenerateAst.java
      in main()
            "Function   : Token name, List<Token> params," +
      @@ -613,7 +613,7 @@ 

      10  body. We store the body as the list of statements contained inside the curly braces.

      Over in the parser, we weave in the new declaration.

      -
          try {
      +
          try {
       
      lox/Parser.java
      in declaration()
            if (match(FUN)) return function("function");
      @@ -625,9 +625,9 @@ 

      10  encounter fun, we call function. That corresponds to the function grammar rule since we already matched and consumed the fun keyword. We’ll build the method up a piece at a time, starting with this:

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after expressionStatement()
      -
        private Stmt.Function function(String kind) {
      +
        private Stmt.Function function(String kind) {
           Token name = consume(IDENTIFIER, "Expect " + kind + " name.");
         }
       
      @@ -639,7 +639,7 @@

      10  inside classes. When we do that, we’ll pass in “method” for kind so that the error messages are specific to the kind of declaration being parsed.

      Next, we parse the parameter list and the pair of parentheses wrapped around it.

      -
          Token name = consume(IDENTIFIER, "Expect " + kind + " name.");
      +
          Token name = consume(IDENTIFIER, "Expect " + kind + " name.");
       
      lox/Parser.java
      in function()
          consume(LEFT_PAREN, "Expect '(' after " + kind + " name.");
      @@ -667,7 +667,7 @@ 

      10  that you don’t exceed the maximum number of parameters a function is allowed to have.

      Finally, we parse the body and wrap it all up in a function node.

      -
          consume(RIGHT_PAREN, "Expect ')' after parameters.");
      +
          consume(RIGHT_PAREN, "Expect ')' after parameters.");
       
      lox/Parser.java
      in function()
      @@ -694,9 +694,9 @@ 

      10 . 4< we can call it. We don’t want the runtime phase of the interpreter to bleed into the front end’s syntax classes so we don’t want Stmt.Function itself to implement that. Instead, we wrap it in a new class.

      -
      lox/LoxFunction.java
      +
      lox/LoxFunction.java
      create new file
      -
      package com.craftinginterpreters.lox;
      +
      package com.craftinginterpreters.lox;
       
       import java.util.List;
       
      @@ -710,9 +710,9 @@ 

      10 . 4<
      lox/LoxFunction.java, create new file

      We implement the call() of LoxCallable like so:

      -
      lox/LoxFunction.java
      +
      lox/LoxFunction.java
      add after LoxFunction()
      -
        @Override
      +
        @Override
         public Object call(Interpreter interpreter,
                            List<Object> arguments) {
           Environment environment = new Environment(interpreter.globals);
      @@ -743,7 +743,7 @@ 

      10 . 4< calls to the same function in play at the same time, each needs its own environment, even though they are all calls to the same function.

      For example, here’s a convoluted way to count to three:

      -
      fun count(n) {
      +
      fun count(n) {
         if (n > 1) count(n - 1);
         print n;
       }
      @@ -761,7 +761,7 @@ 

      10 . 4< lists in lockstep. For each pair, it creates a new variable with the parameter’s name and binds it to the argument’s value.

      So, for a program like this:

      -
      fun add(a, b, c) {
      +
      fun add(a, b, c) {
         print a + b + c;
       }
       
      @@ -789,9 +789,9 @@ 

      10 . 4< lists have the same length. This is safe because visitCallExpr() checks the arity before calling call(). It relies on the function reporting its arity to do that.

      -
      lox/LoxFunction.java
      +
      lox/LoxFunction.java
      add after LoxFunction()
      -
        @Override
      +
        @Override
         public int arity() {
           return declaration.params.size();
         }
      @@ -800,9 +800,9 @@ 

      10 . 4<

      That’s most of our object representation. While we’re in here, we may as well implement toString().

      -
      lox/LoxFunction.java
      +
      lox/LoxFunction.java
      add after LoxFunction()
      -
        @Override
      +
        @Override
         public String toString() {
           return "<fn " + declaration.name.lexeme + ">";
         }
      @@ -810,7 +810,7 @@ 

      10 . 4<
      lox/LoxFunction.java, add after LoxFunction()

      This gives nicer output if a user decides to print a function value.

      -
      fun add(a, b) {
      +
      fun add(a, b) {
         print a + b;
       }
       
      @@ -819,9 +819,9 @@ 

      10 . 4<

      10 . 4 . 1Interpreting function declarations

      We’ll come back and refine LoxFunction soon, but that’s enough to get started. Now we can visit a function declaration.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitExpressionStmt()
      -
        @Override
      +
        @Override
         public Void visitFunctionStmt(Stmt.Function stmt) {
           LoxFunction function = new LoxFunction(stmt);
           environment.define(stmt.name.lexeme, function);
      @@ -840,7 +840,7 @@ 

      fun sayHi(first, last) {
      +
      fun sayHi(first, last) {
         print "Hi, " + first + " " + last + "!";
       }
       
      @@ -859,7 +859,7 @@ 

      10 .  -
      statementexprStmt
      +
      statementexprStmt
                      | forStmt
                      | ifStmt
                      | printStmt
      @@ -878,7 +878,7 @@ 

      10 .  are no true void functions. The compiler has no way of preventing you from taking the result value of a call to a function that doesn’t contain a return statement.

      -
      fun procedure() {
      +
      fun procedure() {
         print "don't return anything";
       }
       
      @@ -889,10 +889,10 @@ 

      10 .  return statements at all. We use nil for this, which is why LoxFunction’s implementation of call() returns null at the end. In that same vein, if you omit the value in a return statement, we simply treat it as equivalent to:

      -
      return nil;
      +
      return nil;
       

      Over in our AST generator, we add a new node.

      -
            "Print      : Expr expression",
      +
            "Print      : Expr expression",
       
      tool/GenerateAst.java
      in main()
            "Return     : Token keyword, Expr value",
      @@ -906,7 +906,7 @@ 

      10 . 

      It keeps the return keyword token so we can use its location for error reporting, and the value being returned, if any. We parse it like other statements, first by recognizing the initial keyword.

      -
          if (match(PRINT)) return printStatement();
      +
          if (match(PRINT)) return printStatement();
       
      lox/Parser.java
      in statement()
          if (match(RETURN)) return returnStatement();
      @@ -915,9 +915,9 @@ 

      10 . 
      lox/Parser.java, in statement()

      That branches out to:

      -
      lox/Parser.java
      +
      lox/Parser.java
      add after printStatement()
      -
        private Stmt returnStatement() {
      +
        private Stmt returnStatement() {
           Token keyword = previous();
           Expr value = null;
           if (!check(SEMICOLON)) {
      @@ -943,7 +943,7 @@ 

      10 .& kind of jacked up control flow construct.

      For example, say we’re running this program and we’re about to execute the return statement:

      -
      fun count(n) {
      +
       

      The Java call stack currently looks roughly like this:

      -
      Interpreter.visitReturnStmt()
      +
      Interpreter.visitReturnStmt()
       Interpreter.visitIfStmt()
       Interpreter.executeBlock()
       Interpreter.visitBlockStmt()
      @@ -969,9 +969,9 @@ 

      10 .& visit methods of all of the containing statements back to the code that began executing the body.

      The visit method for our new AST node looks like this:

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitPrintStmt()
      -
        @Override
      +
        @Override
         public Void visitReturnStmt(Stmt.Return stmt) {
           Object value = null;
           if (stmt.value != null) value = evaluate(stmt.value);
      @@ -983,9 +983,9 @@ 

      10 .&

      If we have a return value, we evaluate it, otherwise, we use nil. Then we take that value and wrap it in a custom exception class and throw it.

      -
      lox/Return.java
      +
      lox/Return.java
      create new file
      -
      package com.craftinginterpreters.lox;
      +
      package com.craftinginterpreters.lox;
       
       class Return extends RuntimeException {
         final Object value;
      @@ -1012,7 +1012,7 @@ 

      10 .&

      We want this to unwind all the way to where the function call began, the call() method in LoxFunction.

      -
                arguments.get(i));
      +
                arguments.get(i));
           }
       
       
      lox/LoxFunction.java
      @@ -1035,7 +1035,7 @@

      10 .&

      Let’s try it out. We finally have enough power to support this classic examplea recursive function to calculate Fibonacci numbers:

      -
      fun fib(n) {
      +
      fun fib(n) {
         if (n <= 1) return n;
         return fib(n - 2) + fib(n - 1);
       }
      @@ -1071,7 +1071,7 @@ 

      local functions that are defined inside another function, or nested inside a block.

      Consider this classic example:

      -
      fun makeCounter() {
      +
      fun makeCounter() {
         var i = 0;
         fun count() {
           i = i + 1;
      @@ -1117,7 +1117,7 @@ 

      -
        private final Stmt.Function declaration;
      +
       
       

      We initialize that in the constructor.

      -
      lox/LoxFunction.java
      +

      When we create a LoxFunction, we capture the current environment.

      -
        public Void visitFunctionStmt(Stmt.Function stmt) {
      +
        public Void visitFunctionStmt(Stmt.Function stmt) {
       
      lox/Interpreter.java
      in visitFunctionStmt()
      replace 1 line
      @@ -1150,7 +1150,7 @@

      called, which is what we want. It represents the lexical scope surrounding the function declaration. Finally, when we call the function, we use that environment as the call’s parent instead of going straight to globals.

      -
                           List<Object> arguments) {
      +
                           List<Object> arguments) {
       
      lox/LoxFunction.java
      in call()
      replace 1 line
      @@ -1191,7 +1191,7 @@

      Challenges

      functions or lambdasan expression syntax that creates a function without binding it to a name. Add anonymous function syntax to Lox so that this works:

      -
      fun thrice(fn) {
      +
      fun thrice(fn) {
         for (var i = 1; i <= 3; i = i + 1) {
           fn(i);
         }
      @@ -1206,12 +1206,12 @@ 

      Challenges

      How do you handle the tricky case of an anonymous function expression occurring in an expression statement:

      -
      fun () {};
      +
      fun () {};
       
    1. Is this program valid?

      -
      fun scope(a) {
      +
      fun scope(a) {
         var a = "local";
       }
       
      diff --git a/site/garbage-collection.html b/site/garbage-collection.html index 40a18942e..bf249edb4 100644 --- a/site/garbage-collection.html +++ b/site/garbage-collection.html @@ -1,5 +1,5 @@ - + Garbage Collection · Crafting Interpreters @@ -155,7 +155,7 @@

      26 . 1R

      That sounds too conservative. Couldn’t any bit of memory potentially be read? Actually, no, at least not in a memory-safe language like Lox. Here’s an example:

      -
      var a = "first value";
      +
      var a = "first value";
       a = "updated";
       // GC here.
       print a;
      @@ -167,7 +167,7 @@ 

      26 . 1R there is some way for a user program to reference it. Otherwise, like the string “first value” here, it is unreachable.

      Many values can be directly accessed by the VM. Take a look at:

      -
      var global = "string";
      +
      var global = "string";
       {
         var local = "another";
         print global + local;
      @@ -191,7 +191,7 @@ 

      26 . 1R -
      fun makeClosure() {
      +
      fun makeClosure() {
         var a = "data";
       
         fun f() { print a; }
      @@ -294,7 +294,7 @@ 

      26 .̴ -
      void* reallocate(void* pointer, size_t oldSize, size_t newSize);
      +
      void* reallocate(void* pointer, size_t oldSize, size_t newSize);
       
      memory.h
      add after reallocate()
      void collectGarbage();
      @@ -303,9 +303,9 @@ 

      26 .̴
      memory.h, add after reallocate()

      We’ll work our way up to a full implementation starting with this empty shell:

      -
      memory.c
      +
      memory.c
      add after freeObject()
      -
      void collectGarbage() {
      +
      void collectGarbage() {
       }
       
      memory.c, add after freeObject()
      @@ -314,7 +314,7 @@

      26 .̴ turns out that’s a subtle question that we’ll spend some time on later in the chapter. For now we’ll sidestep the issue and build ourselves a handy diagnostic tool in the process.

      -
      #define DEBUG_TRACE_EXECUTION
      +
      #define DEBUG_TRACE_EXECUTION
       
      common.h
       
      @@ -330,7 +330,7 @@ 

      26 .̴ horrendous for performance. But it’s great for flushing out memory management bugs that occur only when a GC is triggered at just the right moment. If every moment triggers a GC, you’re likely to find those bugs.

      -
      void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
      +
      void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
       
      memory.c
      in reallocate()
        if (newSize > oldSize) {
      @@ -367,7 +367,7 @@ 

      26 . 2  add one, how do we tell if it’s doing anything useful? Can we tell only if we write programs that plow through acres of memory? How do we debug that?

      An easy way to shine a light into the GC’s inner workings is with some logging.

      -
      #define DEBUG_STRESS_GC
      +
      #define DEBUG_STRESS_GC
       
      common.h
      #define DEBUG_LOG_GC
       
      @@ -379,7 +379,7 @@ 

      26 . 2 

      When this is enabled, clox prints information to the console when it does something with dynamic memory.

      We need a couple of includes.

      -
      #include "vm.h"
      +
      #include "vm.h"
       
      memory.c
       
      @@ -395,7 +395,7 @@ 

      26 . 2 

      We don’t have a collector yet, but we can start putting in some of the logging now. We’ll want to know when a collection run starts.

      -
      void collectGarbage() {
      +
      void collectGarbage() {
       
      memory.c
      in collectGarbage()
      #ifdef DEBUG_LOG_GC
      @@ -407,7 +407,7 @@ 

      26 . 2 

      Eventually we will log some other operations during the collection, so we’ll also want to know when the show’s over.

      -
        printf("-- gc begin\n");
      +
        printf("-- gc begin\n");
       #endif
       
      memory.c
      in collectGarbage()
      @@ -422,7 +422,7 @@

      26 . 2 

      We don’t have any code for the collector yet, but we do have functions for allocating and freeing, so we can instrument those now.

      -
        vm.objects = object;
      +
        vm.objects = object;
       
      object.c
      in allocateObject()
      @@ -436,7 +436,7 @@ 

      26 . 2 
      object.c, in allocateObject()

      And at the end of an object’s lifespan:

      -
      static void freeObject(Obj* object) {
      +
      static void freeObject(Obj* object) {
       
      memory.c
      in freeObject()
      #ifdef DEBUG_LOG_GC
      @@ -454,7 +454,7 @@ 

      26 .  reference from one object to another forms a connection, and these constellations are the graph that the mark phase traverses. Marking begins at the roots.

      -
      #ifdef DEBUG_LOG_GC
      +
      #ifdef DEBUG_LOG_GC
         printf("-- gc begin\n");
       #endif
       
      memory.c
      @@ -470,9 +470,9 @@

      26 . 

      Most roots are local variables or temporaries sitting right in the VM’s stack, so we start by walking that.

      -
      memory.c
      +
      memory.c
      add after freeObject()
      -
      static void markRoots() {
      +
      static void markRoots() {
         for (Value* slot = vm.stack; slot < vm.stackTop; slot++) {
           markValue(*slot);
         }
      @@ -481,7 +481,7 @@ 

      26 . 
      memory.c, add after freeObject()

      To mark a Lox value, we use this new function:

      -
      void* reallocate(void* pointer, size_t oldSize, size_t newSize);
      +
      void* reallocate(void* pointer, size_t oldSize, size_t newSize);
       
      memory.h
      add after reallocate()
      void markValue(Value value);
      @@ -490,9 +490,9 @@ 

      26 . 
      memory.h, add after reallocate()

      Its implementation is here:

      -
      memory.c
      +
      memory.c
      add after reallocate()
      -
      void markValue(Value value) {
      +
      void markValue(Value value) {
         if (IS_OBJ(value)) markObject(AS_OBJ(value));
       }
       
      @@ -502,7 +502,7 @@

      26 .  Value and require no heap allocation. The garbage collector doesn’t need to worry about them at all, so the first thing we do is ensure that the value is an actual heap object. If so, the real work happens in this function:

      -
      void* reallocate(void* pointer, size_t oldSize, size_t newSize);
      +
      void* reallocate(void* pointer, size_t oldSize, size_t newSize);
       
      memory.h
      add after reallocate()
      void markObject(Obj* object);
      @@ -511,9 +511,9 @@ 

      26 . 
      memory.h, add after reallocate()

      Which is defined here:

      -
      memory.c
      +
      memory.c
      add after reallocate()
      -
      void markObject(Obj* object) {
      +
      void markObject(Obj* object) {
         if (object == NULL) return;
         object->isMarked = true;
       }
      @@ -526,7 +526,7 @@ 

      26 .  object being pointed to is optional.

      Assuming we do have a valid object, we mark it by setting a flag. That new field lives in the Obj header struct all objects share.

      -
        ObjType type;
      +
        ObjType type;
       
      object.h
      in struct Obj
        bool isMarked;
      @@ -536,7 +536,7 @@ 

      26 . 

      Every new object begins life unmarked because we haven’t yet determined if it is reachable or not.

      -
        object->type = type;
      +
        object->type = type;
       
      object.c
      in allocateObject()
        object->isMarked = false;
      @@ -547,7 +547,7 @@ 

      26 . 
      object.c, in allocateObject()

      Before we go any farther, let’s add some logging to markObject().

      -
      void markObject(Obj* object) {
      +
      void markObject(Obj* object) {
         if (object == NULL) return;
       
      memory.c
      in markObject()
      @@ -564,7 +564,7 @@

      26 . 

      This way we can see what the mark phase is doing. Marking the stack takes care of local variables and temporaries. The other main source of roots are the global variables.

      -
          markValue(*slot);
      +
          markValue(*slot);
         }
       
      memory.c
      in markRoots()
      @@ -577,7 +577,7 @@

      26 . 

      Those live in a hash table owned by the VM, so we’ll declare another helper function for marking all of the objects in a table.

      -
      ObjString* tableFindString(Table* table, const char* chars,
      +
      ObjString* tableFindString(Table* table, const char* chars,
                                  int length, uint32_t hash);
       
      table.h
      add after tableFindString()
      @@ -589,9 +589,9 @@

      26 . 
      table.h, add after tableFindString()

      We implement that in the “table” module here:

      -
      table.c
      +
      table.c
      add after tableFindString()
      -
      void markTable(Table* table) {
      +
      void markTable(Table* table) {
         for (int i = 0; i < table->capacity; i++) {
           Entry* entry = &table->entries[i];
           markObject((Obj*)entry->key);
      @@ -613,7 +613,7 @@ 

      26 .̴ separate stack of CallFrames. Each CallFrame contains a pointer to the closure being called. The VM uses those pointers to access constants and upvalues, so those closures need to be kept around too.

      -
        }
      +
        }
       
      memory.c
      in markRoots()
      @@ -629,7 +629,7 @@ 

      26 .̴

      Speaking of upvalues, the open upvalue list is another set of values that the VM can directly reach.

      -
        for (int i = 0; i < vm.frameCount; i++) {
      +
        for (int i = 0; i < vm.frameCount; i++) {
           markObject((Obj*)vm.frames[i].closure);
         }
       
      memory.c
      @@ -654,7 +654,7 @@

      26 .̴ the compiler directly accesses need to be treated as roots too.

      To keep the compiler module cleanly separated from the rest of the VM, we’ll do that in a separate function.

      -
        markTable(&vm.globals);
      +
        markTable(&vm.globals);
       
      memory.c
      in markRoots()
        markCompilerRoots();
      @@ -663,7 +663,7 @@ 

      26 .̴
      memory.c, in markRoots()

      It’s declared here:

      -
      ObjFunction* compile(const char* source);
      +
      ObjFunction* compile(const char* source);
       
      compiler.h
      add after compile()
      void markCompilerRoots();
      @@ -674,7 +674,7 @@ 

      26 .̴
      compiler.h, add after compile()

      Which means the “memory” module needs an include.

      -
      #include <stdlib.h>
      +
      #include <stdlib.h>
       
       
      memory.c
      #include "compiler.h"
      @@ -683,9 +683,9 @@ 

      26 .̴
      memory.c

      And the definition is over in the “compiler” module.

      -
      compiler.c
      +
      compiler.c
      add after compile()
      -
      void markCompilerRoots() {
      +
      void markCompilerRoots() {
         Compiler* compiler = current;
         while (compiler != NULL) {
           markObject((Obj*)compiler->function);
      @@ -700,7 +700,7 @@ 

      26 .̴ declarations can nest, the compiler has a linked list of those and we walk the whole list.

      Since the “compiler” module is calling markObject(), it also needs an include.

      -
      #include "compiler.h"
      +
      #include "compiler.h"
       
      compiler.c
      #include "memory.h"
       
      #include "scanner.h"
      @@ -821,7 +821,7 @@ 

      Instead, we’ll create a separate worklist to keep track of all of the gray objects. When an object turns gray, in addition to setting the mark field we’ll also add it to the worklist.

      -
        object->isMarked = true;
      +
        object->isMarked = true;
       
      memory.c
      in markObject()
      @@ -846,7 +846,7 @@ 

      We’ll manage its memory ourselves, explicitly. The VM owns the gray stack.

      -
        Obj* objects;
      +
       
       

      It starts out empty.

      -
        vm.objects = NULL;
      +
       
       

      And we need to free it when the VM shuts down.

      -
          object = next;
      +
          object = next;
         }
       
      memory.c
      in freeObjects()
      @@ -895,7 +895,7 @@

      -
          vm.grayStack = (Obj**)realloc(vm.grayStack,
      +
          vm.grayStack = (Obj**)realloc(vm.grayStack,
                                         sizeof(Obj*) * vm.grayCapacity);
       
      memory.c
      in markObject()
      @@ -910,7 +910,7 @@

      26

      OK, now when we’re done marking the roots, we have both set a bunch of fields and filled our work list with objects to chew through. It’s time for the next phase.

      -
        markRoots();
      +
        markRoots();
       
      memory.c
      in collectGarbage()
        traceReferences();
      @@ -921,9 +921,9 @@ 

      26
      memory.c, in collectGarbage()

      Here’s the implementation:

      -
      memory.c
      +
      memory.c
      add after markRoots()
      -
      static void traceReferences() {
      +
      static void traceReferences() {
         while (vm.grayCount > 0) {
           Obj* object = vm.grayStack[--vm.grayCount];
           blackenObject(object);
      @@ -939,9 +939,9 @@ 

      26 between turning white objects gray and gray objects black, gradually advancing the entire wavefront forward.

      Here’s where we traverse a single object’s references:

      -
      memory.c
      +
      memory.c
      add after markValue()
      -
      static void blackenObject(Obj* object) {
      +
      static void blackenObject(Obj* object) {
         switch (object->type) {
           case OBJ_NATIVE:
           case OBJ_STRING:
      @@ -969,7 +969,7 @@ 

      26 time, friend.

      Now let’s start adding in the other object types. The simplest is upvalues.

      -
      static void blackenObject(Obj* object) {
      +
      static void blackenObject(Obj* object) {
         switch (object->type) {
       
      memory.c
      in blackenObject()
      @@ -984,7 +984,7 @@

      26 Since the value is no longer on the stack, we need to make sure we trace the reference to it from the upvalue.

      Next are functions.

      -
        switch (object->type) {
      +
        switch (object->type) {
       
      memory.c
      in blackenObject()
          case OBJ_FUNCTION: {
      @@ -1000,9 +1000,9 @@ 

      26

      Each function has a reference to an ObjString containing the function’s name. More importantly, the function has a constant table packed full of references to other objects. We trace all of those using this helper:

      -
      memory.c
      +
      memory.c
      add after markValue()
      -
      static void markArray(ValueArray* array) {
      +
      static void markArray(ValueArray* array) {
         for (int i = 0; i < array->count; i++) {
           markValue(array->values[i]);
         }
      @@ -1012,7 +1012,7 @@ 

      26

      The last object type we have nowwe’ll add more in later chaptersis closures.

      -
        switch (object->type) {
      +
        switch (object->type) {
       
      memory.c
      in blackenObject()
          case OBJ_CLOSURE: {
      @@ -1031,7 +1031,7 @@ 

      26 of pointers to the upvalues it captures. We trace all of those.

      That’s the basic mechanism for processing a gray object, but there are two loose ends to tie up. First, some logging.

      -
      static void blackenObject(Obj* object) {
      +
      static void blackenObject(Obj* object) {
       
      memory.c
      in blackenObject()
      #ifdef DEBUG_LOG_GC
      @@ -1051,7 +1051,7 @@ 

      26 an infinite loop as it continually re-adds the same series of objects to the gray stack.

      The fix is easy.

      -
        if (object == NULL) return;
      +
        if (object == NULL) return;
       
      memory.c
      in markObject()
        if (object->isMarked) return;
      @@ -1070,7 +1070,7 @@ 

      26 either black or white. The black objects are reachable, and we want to hang on to them. Anything still white never got touched by the trace and is thus garbage. All that’s left is to reclaim them.

      -
        traceReferences();
      +
        traceReferences();
       
      memory.c
      in collectGarbage()
        sweep();
      @@ -1081,9 +1081,9 @@ 

      26
      memory.c, in collectGarbage()

      All of the logic lives in one function.

      -
      memory.c
      +
      memory.c
      add after traceReferences()
      -
      static void sweep() {
      +
      static void sweep() {
         Obj* previous = NULL;
         Obj* object = vm.objects;
         while (object != NULL) {
      @@ -1117,7 +1117,7 @@ 

      26 node so we can unlink its next pointer, and we have to handle the edge case where we are freeing the first node. But, otherwise, it’s pretty simpledelete every node in a linked list that doesn’t have a bit set in it.

      There’s one little addition:

      -
          if (object->isMarked) {
      +
          if (object->isMarked) {
       
      memory.c
      in sweep()
            object->isMarked = false;
      @@ -1166,7 +1166,7 @@ 

      —and their mark bitsare no longer around to check. So the right time is exactly between the marking and sweeping phases.

      -
        traceReferences();
      +
        traceReferences();
       
      memory.c
      in collectGarbage()
        tableRemoveWhite(&vm.strings);
      @@ -1176,7 +1176,7 @@ 

      ObjString* tableFindString(Table* table, const char* chars,
      +
       
       

      The implementation is here:

      -
      table.c
      +
      table.c
      add after tableFindString()
      -
      void tableRemoveWhite(Table* table) {
      +
      void tableRemoveWhite(Table* table) {
         for (int i = 0; i < table->capacity; i++) {
           Entry* entry = &table->entries[i];
           if (entry->key != NULL && !entry->key->obj.isMarked) {
      @@ -1361,7 +1361,7 @@ 

      26 . pile of live objects. As the amount of live memory goes down, we collect more frequently so that we don’t lose too much latency by waiting too long.

      The implementation requires two new bookkeeping fields in the VM.

      -
        ObjUpvalue* openUpvalues;
      +
        ObjUpvalue* openUpvalues;
       
      vm.h
      in struct VM
      @@ -1375,7 +1375,7 @@ 

      26 .

      The first is a running total of the number of bytes of managed memory the VM has allocated. The second is the threshold that triggers the next collection. We initialize them when the VM starts up.

      -
        vm.objects = NULL;
      +
        vm.objects = NULL;
       
      vm.c
      in initVM()
        vm.bytesAllocated = 0;
      @@ -1398,7 +1398,7 @@ 

      26 . real-world programs it is actually intended for. It’s like tuning a rally caryou need to take it out on the course.

      Every time we allocate or free some memory, we adjust the counter by that delta.

      -
      void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
      +
      void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
       
      memory.c
      in reallocate()
        vm.bytesAllocated += newSize - oldSize;
      @@ -1407,7 +1407,7 @@ 

      26 .
      memory.c, in reallocate()

      When the total crosses the limit, we run the collector.

      -
          collectGarbage();
      +
          collectGarbage();
       #endif
       
      memory.c
      in reallocate()
      @@ -1425,7 +1425,7 @@

      26 . objects by calling reallocate(), which lowers the value of bytesAllocated, so after the collection completes, we know how many live bytes remain. We adjust the threshold of the next GC based on that.

      -
        sweep();
      +
        sweep();
       
      memory.c
      in collectGarbage()
      @@ -1441,7 +1441,7 @@ 

      26 . the program uses grows, the threshold moves farther out to limit the total time spent re-traversing the larger live set. Like other numbers in this chapter, the scaling factor is basically arbitrary.

      -
      #endif
      +
      #endif
       
      memory.c
       
      @@ -1455,7 +1455,7 @@ 

      26 .

      You’d want to tune this in your implementation once you had some real programs to benchmark it on. Right now, we can at least log some of the statistics that we have. We capture the heap size before the collection.

      -
        printf("-- gc begin\n");
      +
        printf("-- gc begin\n");
       
      memory.c
      in collectGarbage()
        size_t before = vm.bytesAllocated;
      @@ -1464,7 +1464,7 @@ 

      26 .
      memory.c, in collectGarbage()

      And then print the results at the end.

      -
        printf("-- gc end\n");
      +
        printf("-- gc end\n");
       
      memory.c
      in collectGarbage()
        printf("   collected %zu bytes (from %zu to %zu) next at %zu\n",
      @@ -1536,7 +1536,7 @@ 

      The fix, as you’ve seen in other places, is to push the constant onto the stack temporarily.

      -
      int addConstant(Chunk* chunk, Value value) {
      +
       
       

      Once the constant table contains the object, we pop it off the stack.

      -
        writeValueArray(&chunk->constants, value);
      +
        writeValueArray(&chunk->constants, value);
       
      chunk.c
      in addConstant()
        pop();
      @@ -1556,7 +1556,7 @@ 

      When the GC is marking roots, it walks the chain of compilers and marks each of their functions, so the new constant is reachable now. We do need an include to call into the VM from the “chunk” module.

      -
      #include "memory.h"
      +
      #include "memory.h"
       
      chunk.c
      #include "vm.h"
       
      @@ -1571,7 +1571,7 @@ 

      26 .  is going. Since the string is brand new, it isn’t reachable anywhere. And resizing the string pool can trigger a collection. Again, we go ahead and stash the string on the stack first.

      -
        string->chars = chars;
      +
        string->chars = chars;
         string->hash = hash;
       
      object.c
      in allocateString()
      @@ -1583,7 +1583,7 @@

      26 . 
      object.c, in allocateString()

      And then pop it back off once it’s safely nestled in the table.

      -
        tableSet(&vm.strings, string, NIL_VAL);
      +
        tableSet(&vm.strings, string, NIL_VAL);
       
      object.c
      in allocateString()
        pop();
      @@ -1606,7 +1606,7 @@ 

      26  heap, which can in turn trigger a GC. Since we’ve already popped the operand strings by that point, they can potentially be missed by the mark phase and get swept away. Instead of popping them off the stack eagerly, we peek them.

      -
      static void concatenate() {
      +
      static void concatenate() {
       
      vm.c
      in concatenate()
      replace 2 lines
      @@ -1621,7 +1621,7 @@

      26 

      That way, they are still hanging out on the stack when we create the result string. Once that’s done, we can safely pop them off and replace them with the result.

      -
        ObjString* result = takeString(chars, length);
      +
        ObjString* result = takeString(chars, length);
       
      vm.c
      in concatenate()
        pop();
      diff --git a/site/global-variables.html b/site/global-variables.html
      index f03c8b541..a02ea1013 100644
      --- a/site/global-variables.html
      +++ b/site/global-variables.html
      @@ -1,5 +1,5 @@
       
      -
      +
       
       
       Global Variables · Crafting Interpreters
      @@ -127,7 +127,7 @@ 

      Global Variables

      a global variable before it’s defined. As long as the code doesn’t execute before the definition happens, everything is fine. In practice, that means you can refer to later variables inside the body of functions.

      -
      fun showVariable() {
      +
      fun showVariable() {
         print global;
       }
       
      @@ -149,13 +149,13 @@ 

      21 . 1State bind a new name to a value. The other kinds of statementscontrol flow, print, etc.are just called “statements”. We disallow declarations directly inside control flow statements, like this:

      -
      if (monday) var croissant = "yes"; // Error.
      +
      if (monday) var croissant = "yes"; // Error.
       

      Allowing it would raise confusing questions around the scope of the variable. So, like other languages, we prohibit it syntactically by having a separate grammar rule for the subset of statements that are allowed inside a control flow body.

      -
      statementexprStmt
      +
       

      Then we use a separate rule for the top level of a script and inside a block.

      -
      declarationclassDecl
      +
      declarationclassDecl
                      | funDecl
                      | varDecl
                      | statement ;
      @@ -181,7 +181,7 @@ 

      21 . 1State

      In this chapter, we’ll cover only a couple of statements and one declaration.

      -
      statementexprStmt
      +
      statementexprStmt
                      | printStmt ;
       
       declarationvarDecl
      @@ -190,7 +190,7 @@ 

      21 . 1State

      Up to now, our VM considered a “program” to be a single expression since that’s all we could parse and compile. In a full Lox implementation, a program is a sequence of declarations. We’re ready to support that now.

      -
        advance();
      +
        advance();
       
      compiler.c
      in compile()
      replace 2 lines
      @@ -206,9 +206,9 @@

      21 . 1State

      We keep compiling declarations until we hit the end of the source file. We compile a single declaration using this:

      -
      compiler.c
      +
      compiler.c
      add after expression()
      -
      static void declaration() {
      +
      static void declaration() {
         statement();
       }
       
      @@ -216,9 +216,9 @@

      21 . 1State

      We’ll get to variable declarations later in the chapter, so for now, we simply forward to statement().

      -
      compiler.c
      +
      compiler.c
      add after declaration()
      -
      static void statement() {
      +
      static void statement() {
         if (match(TOKEN_PRINT)) {
           printStatement();
         }
      @@ -229,7 +229,7 @@ 

      21 . 1State

      Blocks can contain declarations, and control flow statements can contain other statements. That means these two functions will eventually be recursive. We may as well write out the forward declarations now.

      -
      static void expression();
      +
      static void expression();
       
      compiler.c
      add after expression()
      static void statement();
      @@ -242,9 +242,9 @@ 

      21 . 1&

      We have two statement types to support in this chapter. Let’s start with print statements, which begin, naturally enough, with a print token. We detect that using this helper function:

      -
      compiler.c
      +
      compiler.c
      add after consume()
      -
      static bool match(TokenType type) {
      +
      static bool match(TokenType type) {
         if (!check(type)) return false;
         advance();
         return true;
      @@ -259,9 +259,9 @@ 

      21 . 1& -
      compiler.c
      +
      compiler.c
      add after consume()
      -
      static bool check(TokenType type) {
      +
      static bool check(TokenType type) {
         return parser.current.type == type;
       }
       
      @@ -279,9 +279,9 @@

      21 . 1&

      If we did match the print token, then we compile the rest of the statement here:

      -
      compiler.c
      +
      compiler.c
      add after expression()
      -
      static void printStatement() {
      +
      static void printStatement() {
         expression();
         consume(TOKEN_SEMICOLON, "Expect ';' after value.");
         emitByte(OP_PRINT);
      @@ -292,7 +292,7 @@ 

      21 . 1&

      A print statement evaluates an expression and prints the result, so we first parse and compile that expression. The grammar expects a semicolon after that, so we consume it. Finally, we emit a new instruction to print the result.

      -
        OP_NEGATE,
      +
        OP_NEGATE,
       
      chunk.h
      in enum OpCode
        OP_PRINT,
      @@ -301,7 +301,7 @@ 

      21 . 1&
      chunk.h, in enum OpCode

      At runtime, we execute this instruction like so:

      -
              break;
      +
              break;
       
      vm.c
      in run()
            case OP_PRINT: {
      @@ -335,7 +335,7 @@ 

      21 . 1& series of statements. If each statement grew or shrank the stack, it might eventually overflow or underflow.

      While we’re in the interpreter loop, we should delete a bit of code.

      -
            case OP_RETURN: {
      +
            case OP_RETURN: {
       
      vm.c
      in run()
      replace 2 lines
      @@ -353,7 +353,7 @@

      21 . 1& add functions. Right now, it exits the entire interpreter loop.

      As usual, a new instruction needs support in the disassembler.

      -
            return simpleInstruction("OP_NEGATE", offset);
      +
            return simpleInstruction("OP_NEGATE", offset);
       
      debug.c
      in disassembleInstruction()
          case OP_PRINT:
      @@ -363,7 +363,7 @@ 

      21 . 1&
      debug.c, in disassembleInstruction()

      That’s our print statement. If you want, give it a whirl:

      -
      print 1 + 2;
      +
      print 1 + 2;
       print 3 * 4;
       

      Exciting! OK, maybe not thrilling, but we can build scripts that contain as many @@ -371,7 +371,7 @@

      21 . 1&

      21 . 1 . 2Expression statements

      Wait until you see the next statement. If we don’t see a print keyword, then we must be looking at an expression statement.

      -
          printStatement();
      +
          printStatement();
       
      compiler.c
      in statement()
        } else {
      @@ -381,9 +381,9 @@ 

      21 
      compiler.c, in statement()

      It’s parsed like so:

      -
      compiler.c
      +
      compiler.c
      add after expression()
      -
      static void expressionStatement() {
      +
      static void expressionStatement() {
         expression();
         consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
         emitByte(OP_POP);
      @@ -395,13 +395,13 @@ 

      21  They’re how you write an expression in a context where a statement is expected. Usually, it’s so that you can call a function or evaluate an assignment for its side effect, like this:

      -
      brunch = "quiche";
      +
      brunch = "quiche";
       eat(brunch);
       

      Semantically, an expression statement evaluates the expression and discards the result. The compiler directly encodes that behavior. It compiles the expression, and then emits an OP_POP instruction.

      -
        OP_FALSE,
      +
        OP_FALSE,
       
      chunk.h
      in enum OpCode
        OP_POP,
      @@ -411,7 +411,7 @@ 

      21 

      As the name implies, that instruction pops the top value off the stack and forgets it.

      -
            case OP_FALSE: push(BOOL_VAL(false)); break;
      +
            case OP_FALSE: push(BOOL_VAL(false)); break;
       
      vm.c
      in run()
            case OP_POP: pop(); break;
      @@ -420,7 +420,7 @@ 

      21 
      vm.c, in run()

      We can disassemble it too.

      -
            return simpleInstruction("OP_FALSE", offset);
      +
            return simpleInstruction("OP_FALSE", offset);
       
      debug.c
      in disassembleInstruction()
          case OP_POP:
      @@ -444,7 +444,7 @@ 

      21  reports. The compiler exits panic mode when it reaches a synchronization point. For Lox, we chose statement boundaries as that point. Now that we have statements, we can implement synchronization.

      -
        statement();
      +
        statement();
       
      compiler.c
      in declaration()
      @@ -456,9 +456,9 @@ 

      21 

      If we hit a compile error while parsing the previous statement, we enter panic mode. When that happens, after the statement we start synchronizing.

      -
      compiler.c
      +
      compiler.c
      add after printStatement()
      -
      static void synchronize() {
      +
      static void synchronize() {
         parser.panicMode = false;
       
         while (parser.current.type != TOKEN_EOF) {
      @@ -505,7 +505,7 @@ 

      21 

      We can’t do either of the last two until we have some variables, so we start with declarations.

      -
      static void declaration() {
      +
      static void declaration() {
       
      compiler.c
      in declaration()
      replace 1 line
      @@ -522,9 +522,9 @@

      21 

      The placeholder parsing function we sketched out for the declaration grammar rule has an actual production now. If we match a var token, we jump here:

      -
      compiler.c
      +
      compiler.c
      add after expression()
      -
      static void varDeclaration() {
      +
      static void varDeclaration() {
         uint8_t global = parseVariable("Expect variable name.");
       
         if (match(TOKEN_EQUAL)) {
      @@ -548,17 +548,17 @@ 

      21  expect the statement to be terminated with a semicolon.

      There are two new functions here for working with variables and identifiers. Here is the first:

      -
      static void parsePrecedence(Precedence precedence);
      +
      static void parsePrecedence(Precedence precedence);
       
       
      compiler.c
      add after parsePrecedence()
      @@ -571,7 +571,7 @@

      21 

      It requires the next token to be an identifier, which it consumes and sends here:

      -
      static void parsePrecedence(Precedence precedence);
      +
      static void parsePrecedence(Precedence precedence);
       
       
      compiler.c
      add after parsePrecedence()
      @@ -592,9 +592,9 @@

      21  the table.

      This function returns that index all the way to varDeclaration() which later hands it over to here:

      -
      compiler.c
      +
      compiler.c
      add after parseVariable()
      -
      static void defineVariable(uint8_t global) {
      +
      static void defineVariable(uint8_t global) {
         emitBytes(OP_DEFINE_GLOBAL, global);
       }
       
      @@ -613,7 +613,7 @@

      21  and assignment expressions access them.

      Over in the runtime, we begin with this new instruction:

      -
        OP_POP,
      +
        OP_POP,
       
      chunk.h
      in enum OpCode
        OP_DEFINE_GLOBAL,
      @@ -622,7 +622,7 @@ 

      21 
      chunk.h, in enum OpCode

      Thanks to our handy-dandy hash table, the implementation isn’t too hard.

      -
            case OP_POP: pop(); break;
      +
            case OP_POP: pop(); break;
       
      vm.c
      in run()
            case OP_DEFINE_GLOBAL: {
      @@ -649,7 +649,7 @@ 

      21  useful in a REPL session, so the VM supports that by simply overwriting the value if the key happens to already be in the hash table.

      There’s another little helper macro:

      -
      #define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
      +
      #define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
       
      vm.c
      in run()
      #define READ_STRING() AS_STRING(READ_CONSTANT())
      @@ -664,7 +664,7 @@ 

      21  constant.

      Because we care about lexical hygiene, we also undefine this macro at the end of the interpret function.

      -
      #undef READ_CONSTANT
      +
      #undef READ_CONSTANT
       
      vm.c
      in run()
      #undef READ_STRING
      @@ -675,7 +675,7 @@ 

      21 

      I keep saying “the hash table”, but we don’t actually have one yet. We need a place to store these globals. Since we want them to persist as long as clox is running, we store them right in the VM.

      -
        Value* stackTop;
      +
        Value* stackTop;
       
      vm.h
      in struct VM
        Table globals;
      @@ -685,7 +685,7 @@ 

      21 

      As we did with the string table, we need to initialize the hash table to a valid state when the VM boots up.

      -
        vm.objects = NULL;
      +
        vm.objects = NULL;
       
      vm.c
      in initVM()
      @@ -700,7 +700,7 @@ 

      21 

      The process will free everything on exit, but it feels undignified to require the operating system to clean up our mess.

      -
      void freeVM() {
      +
      void freeVM() {
       
      vm.c
      in freeVM()
        freeTable(&vm.globals);
      @@ -709,7 +709,7 @@ 

      21 
      vm.c, in freeVM()

      As usual, we want to be able to disassemble the new instruction too.

      -
            return simpleInstruction("OP_POP", offset);
      +
            return simpleInstruction("OP_POP", offset);
       
      debug.c
      in disassembleInstruction()
          case OP_DEFINE_GLOBAL:
      @@ -724,7 +724,7 @@ 

      21 

      21 . 3Reading Variables

      As in every programming language ever, we access a variable’s value using its name. We hook up identifier tokens to the expression parser here:

      -
        [TOKEN_LESS_EQUAL]    = {NULL,     binary, PREC_COMPARISON},
      +
        [TOKEN_LESS_EQUAL]    = {NULL,     binary, PREC_COMPARISON},
       
      compiler.c
      replace 1 line
        [TOKEN_IDENTIFIER]    = {variable, NULL,   PREC_NONE},
      @@ -733,9 +733,9 @@ 

      21 . 
      compiler.c, replace 1 line

      That calls this new parser function:

      -
      compiler.c
      +
      compiler.c
      add after string()
      -
      static void variable() {
      +
      static void variable() {
         namedVariable(parser.previous);
       }
       
      @@ -743,9 +743,9 @@

      21 . 

      Like with declarations, there are a couple of tiny helper functions that seem pointless now but will become more useful in later chapters. I promise.

      -
      compiler.c
      +
      compiler.c
      add after string()
      -
      static void namedVariable(Token name) {
      +
      static void namedVariable(Token name) {
         uint8_t arg = identifierConstant(&name);
         emitBytes(OP_GET_GLOBAL, arg);
       }
      @@ -756,7 +756,7 @@ 

      21 .  given identifier token and add its lexeme to the chunk’s constant table as a string. All that remains is to emit an instruction that loads the global variable with that name. Here’s the instruction:

      -
        OP_POP,
      +
        OP_POP,
       
      chunk.h
      in enum OpCode
        OP_GET_GLOBAL,
      @@ -765,7 +765,7 @@ 

      21 . 
      chunk.h, in enum OpCode

      Over in the interpreter, the implementation mirrors OP_DEFINE_GLOBAL.

      -
            case OP_POP: pop(); break;
      +
            case OP_POP: pop(); break;
       
      vm.c
      in run()
            case OP_GET_GLOBAL: {
      @@ -789,7 +789,7 @@ 

      21 .  never been defined. That’s a runtime error in Lox, so we report it and exit the interpreter loop if that happens. Otherwise, we take the value and push it onto the stack.

      -
            return simpleInstruction("OP_POP", offset);
      +
            return simpleInstruction("OP_POP", offset);
       
      debug.c
      in disassembleInstruction()
          case OP_GET_GLOBAL:
      @@ -800,7 +800,7 @@ 

      21 . 

      A little bit of disassembling, and we’re done. Our interpreter is now able to run code like this:

      -
      var beverage = "cafe au lait";
      +
      var beverage = "cafe au lait";
       var breakfast = "beignets with " + beverage;
       print breakfast;
       
      @@ -816,7 +816,7 @@

      21 . 4Assig

      Our bytecode VM uses a single-pass compiler. It parses and generates bytecode on the fly without any intermediate AST. As soon as it recognizes a piece of syntax, it emits code for it. Assignment doesn’t naturally fit that. Consider:

      -
      menu.brunch(sunday).beverage = "mimosa";
      +
      menu.brunch(sunday).beverage = "mimosa";
       

      In this code, the parser doesn’t realize menu.brunch(sunday).beverage is the target of an assignment and not a normal expression until it reaches =, many @@ -839,7 +839,7 @@

      21 . 4Assig one, we compile it as an assignment or setter instead of a variable access or getter.

      We don’t have setters to worry about yet, so all we need to handle are variables.

      -
        uint8_t arg = identifierConstant(&name);
      +
        uint8_t arg = identifierConstant(&name);
       
      compiler.c
      in namedVariable()
      replace 1 line
      @@ -859,7 +859,7 @@

      21 . 4Assig after the identifier. If we find one, instead of emitting code for a variable access, we compile the assigned value and then emit an assignment instruction.

      That’s the last instruction we need to add in this chapter.

      -
        OP_DEFINE_GLOBAL,
      +
        OP_DEFINE_GLOBAL,
       
      chunk.h
      in enum OpCode
        OP_SET_GLOBAL,
      @@ -868,7 +868,7 @@ 

      21 . 4Assig
      chunk.h, in enum OpCode

      As you’d expect, its runtime behavior is similar to defining a new variable.

      -
            }
      +
            }
       
      vm.c
      in run()
            case OP_SET_GLOBAL: {
      @@ -898,7 +898,7 @@ 

      21 . 4Assig stack. Remember, assignment is an expression, so it needs to leave that value there in case the assignment is nested inside some larger expression.

      Add a dash of disassembly:

      -
            return constantInstruction("OP_DEFINE_GLOBAL", chunk,
      +
            return constantInstruction("OP_DEFINE_GLOBAL", chunk,
                                        offset);
       
      debug.c
      in disassembleInstruction()
      @@ -909,7 +909,7 @@

      21 . 4Assig
      debug.c, in disassembleInstruction()

      So we’re done, right? Well . . . not quite. We’ve made a mistake! Take a gander at:

      -
      a * b = c + d;
      +
      a * b = c + d;
       

      According to Lox’s grammar, = has the lowest precedence, so this should be parsed roughly like:

      The expected parse, like '(a * b) = (c + d)'. @@ -941,7 +941,7 @@

      21 . 4Assig precedence is, logically enough, parsePrecedence(). The variable() function doesn’t need to know the actual level. It just cares that the precedence is low enough to allow assignment, so we pass that fact in as a Boolean.

      -
          error("Expect expression.");
      +
          error("Expect expression.");
           return;
         }
       
      @@ -959,17 +959,17 @@ 

      21 . 4Assig

      Since assignment is the lowest-precedence expression, the only time we allow an assignment is when parsing an assignment expression or top-level expression like in an expression statement. That flag makes its way to the parser function here:

      -
      compiler.c
      +
      compiler.c
      function variable()
      replace 3 lines
      -
      static void variable(bool canAssign) {
      +
      static void variable(bool canAssign) {
         namedVariable(parser.previous, canAssign);
       }
       
      compiler.c, function variable(), replace 3 lines

      Which passes it through a new parameter:

      -
      compiler.c
      +
      compiler.c
      function namedVariable()
      replace 1 line
      static void namedVariable(Token name, bool canAssign) {
      @@ -978,7 +978,7 @@ 

      21 . 4Assig
      compiler.c, function namedVariable(), replace 1 line

      And then finally uses it here:

      -
        uint8_t arg = identifierConstant(&name);
      +
        uint8_t arg = identifierConstant(&name);
       
       
      compiler.c
      in namedVariable()
      @@ -1001,7 +1001,7 @@

      21 . 4Assig

      Then parsePrecedence() silently returns back to the caller. That also isn’t right. If the = doesn’t get consumed as part of the expression, nothing else is going to consume it. It’s an error and we should report it.

      -
          infixRule();
      +
          infixRule();
         }
       
      compiler.c
      in parsePrecedence()
      @@ -1027,7 +1027,7 @@

      21 . 4Assig

      So we’re going to finish off this chapter with some grunt work. First, let’s go ahead and pass the flag to the infix parse functions.

      -
          ParseFn infixRule = getRule(parser.previous.type)->infix;
      +
          ParseFn infixRule = getRule(parser.previous.type)->infix;
       
      compiler.c
      in parsePrecedence()
      replace 1 line
      @@ -1038,7 +1038,7 @@

      21 . 4Assig

      We’ll need that for setters eventually. Then we’ll fix the typedef for the function type.

      -
      } Precedence;
      +
      } Precedence;
       
       
      compiler.c
      add after enum Precedence
      @@ -1052,7 +1052,7 @@

      21 . 4Assig

      And some completely tedious code to accept this parameter in all of our existing parse functions. Here:

      -
      compiler.c
      +
      compiler.c
      function binary()
      replace 1 line
      static void binary(bool canAssign) {
      @@ -1061,7 +1061,7 @@ 

      21 . 4Assig
      compiler.c, function binary(), replace 1 line

      And here:

      -
      compiler.c
      +
      compiler.c
      function literal()
      replace 1 line
      static void literal(bool canAssign) {
      @@ -1070,7 +1070,7 @@ 

      21 . 4Assig
      compiler.c, function literal(), replace 1 line

      And here:

      -
      compiler.c
      +
      compiler.c
      function grouping()
      replace 1 line
      static void grouping(bool canAssign) {
      @@ -1079,7 +1079,7 @@ 

      21 . 4Assig
      compiler.c, function grouping(), replace 1 line

      And here:

      -
      compiler.c
      +
      compiler.c
      function number()
      replace 1 line
      static void number(bool canAssign) {
      @@ -1088,7 +1088,7 @@ 

      21 . 4Assig
      compiler.c, function number(), replace 1 line

      And here too:

      -
      compiler.c
      +
      compiler.c
      function string()
      replace 1 line
      static void string(bool canAssign) {
      @@ -1097,7 +1097,7 @@ 

      21 . 4Assig
      compiler.c, function string(), replace 1 line

      And, finally:

      -
      compiler.c
      +
      compiler.c
      function unary()
      replace 1 line
      static void unary(bool canAssign) {
      @@ -1107,7 +1107,7 @@ 

      21 . 4Assig

      Phew! We’re back to a C program we can compile. Fire it up and now you can run this:

      -
      var breakfast = "beignets";
      +
      var breakfast = "beignets";
       var beverage = "cafe au lait";
       breakfast = "beignets with " + beverage;
       
      @@ -1141,7 +1141,7 @@ 

      Challenges

      compile error when the function is first defined.

      But when a user runs a Lox script, the compiler has access to the full text of the entire program before any code is run. Consider this program:

      -
      fun useVar() {
      +
      fun useVar() {
         print oops;
       }
       
      diff --git a/site/hash-tables.html b/site/hash-tables.html
      index 0e434bd73..901db8e35 100644
      --- a/site/hash-tables.html
      +++ b/site/hash-tables.html
      @@ -1,5 +1,5 @@
       
      -
      +
       
       
       Hash Tables · Crafting Interpreters
      @@ -395,9 +395,9 @@ 

      20 

      The great thing about hash tables compared to other classic techniques like balanced search trees is that the actual data structure is so simple. Ours goes into a new module.

      -
      table.h
      +
      table.h
      create new file
      -
      #ifndef clox_table_h
      +
      #ifndef clox_table_h
       #define clox_table_h
       
       #include "common.h"
      @@ -418,7 +418,7 @@ 

      20  key/value pairs currently stored in it (count). The ratio of count to capacity is exactly the load factor of the hash table.

      Each entry is one of these:

      -
      #include "value.h"
      +
      #include "value.h"
       
      table.h
       
      @@ -442,7 +442,7 @@ 

      20  keys.

      To create a new, empty hash table, we declare a constructor-like function.

      -
      } Table;
      +
      } Table;
       
       
      table.h
      add after struct Table
      @@ -454,9 +454,9 @@

      20 

      We need a new implementation file to define that. While we’re at it, let’s get all of the pesky includes out of the way.

      -
      table.c
      +
      table.c
      create new file
      -
      #include <stdlib.h>
      +
      #include <stdlib.h>
       #include <string.h>
       
       #include "memory.h"
      @@ -475,7 +475,7 @@ 

      20 

      As in our dynamic value array type, a hash table initially starts with zero capacity and a NULL array. We don’t allocate anything until needed. Assuming we do eventually allocate something, we need to be able to free it too.

      -
      void initTable(Table* table);
      +
      void initTable(Table* table);
       
      table.h
      add after initTable()
      void freeTable(Table* table);
      @@ -486,9 +486,9 @@ 

      20 
      table.h, add after initTable()

      And its glorious implementation:

      -
      table.c
      +
      table.c
      add after initTable()
      -
      void freeTable(Table* table) {
      +
      void freeTable(Table* table) {
         FREE_ARRAY(Entry, table->entries, table->capacity);
         initTable(table);
       }
      @@ -510,7 +510,7 @@ 

      20 . 4 walk the string every time we looked for a key in the table. So we’ll do the obvious thing: cache it.

      Over in the “object” module in ObjString, we add:

      -
        char* chars;
      +
        char* chars;
       
      object.h
      in struct ObjString
        uint32_t hash;
      @@ -525,7 +525,7 @@ 

      20 . 4 good time to also do the O(n) calculation of the string’s hash.

      Whenever we call the internal function to allocate a string, we pass in its hash code.

      -
      object.c
      +
      object.c
      function allocateString()
      replace 1 line
      static ObjString* allocateString(char* chars, int length,
      @@ -535,7 +535,7 @@ 

      20 . 4
      object.c, function allocateString(), replace 1 line

      That function simply stores the hash in the struct.

      -
        string->chars = chars;
      +
        string->chars = chars;
       
      object.c
      in allocateString()
        string->hash = hash;
      @@ -547,7 +547,7 @@ 

      20 . 4

      The fun happens over at the callers. allocateString() is called from two places: the function that copies a string and the one that takes ownership of an existing dynamically allocated string. We’ll start with the first.

      -
      ObjString* copyString(const char* chars, int length) {
      +
      ObjString* copyString(const char* chars, int length) {
       
      object.c
      in copyString()
        uint32_t hash = hashString(chars, length);
      @@ -556,7 +556,7 @@ 

      20 . 4
      object.c, in copyString()

      No magic here. We calculate the hash code and then pass it along.

      -
        memcpy(heapChars, chars, length);
      +
        memcpy(heapChars, chars, length);
         heapChars[length] = '\0';
       
      object.c
      in copyString()
      @@ -567,7 +567,7 @@

      20 . 4
      object.c, in copyString(), replace 1 line

      The other string function is similar.

      -
      ObjString* takeString(char* chars, int length) {
      +
      ObjString* takeString(char* chars, int length) {
       
      object.c
      in takeString()
      replace 1 line
      @@ -578,9 +578,9 @@

      20 . 4
      object.c, in takeString(), replace 1 line

      The interesting code is over here:

      -
      object.c
      +
      object.c
      add after allocateString()
      -
      static uint32_t hashString(const char* key, int length) {
      +
      static uint32_t hashString(const char* key, int length) {
         uint32_t hash = 2166136261u;
         for (int i = 0; i < length; i++) {
           hash ^= (uint8_t)key[i];
      @@ -606,7 +606,7 @@ 

      20 . 4

      20 . 4 . 2Inserting entries

      Now that string objects know their hash code, we can start putting them into hash tables.

      -
      void freeTable(Table* table);
      +
      void freeTable(Table* table);
       
      table.h
      add after freeTable()
      bool tableSet(Table* table, ObjString* key, Value value);
      @@ -619,9 +619,9 @@ 

      20 . 

      This function adds the given key/value pair to the given hash table. If an entry for that key is already present, the new value overwrites the old value. The function returns true if a new entry was added. Here’s the implementation:

      -
      table.c
      +
      table.c
      add after freeTable()
      -
      bool tableSet(Table* table, ObjString* key, Value value) {
      +
      bool tableSet(Table* table, ObjString* key, Value value) {
         Entry* entry = findEntry(table->entries, table->capacity, key);
         bool isNewKey = entry->key == NULL;
         if (isNewKey) table->count++;
      @@ -644,7 +644,7 @@ 

      20 . 

      We’re missing a little something here, though. We haven’t actually allocated the Entry array yet. Oops! Before we can insert anything, we need to make sure we have an array, and that it’s big enough.

      -
      bool tableSet(Table* table, ObjString* key, Value value) {
      +
      bool tableSet(Table* table, ObjString* key, Value value) {
       
      table.c
      in tableSet()
        if (table->count + 1 > table->capacity * TABLE_MAX_LOAD) {
      @@ -662,7 +662,7 @@ 

      20 .  a multiple to ensure that we get amortized constant performance over a series of inserts.

      The interesting difference here is that TABLE_MAX_LOAD constant.

      -
      #include "value.h"
      +
      #include "value.h"
       
       
      table.c
      #define TABLE_MAX_LOAD 0.75
      @@ -683,9 +683,9 @@ 

      20 . 

      We’ll get to the implementation of adjustCapacity() soon. First, let’s look at that findEntry() function you’ve been wondering about.

      -
      table.c
      +
      table.c
      add after freeTable()
      -
      static Entry* findEntry(Entry* entries, int capacity,
      +
      static Entry* findEntry(Entry* entries, int capacity,
                               ObjString* key) {
         uint32_t index = key->hash % capacity;
         for (;;) {
      @@ -752,9 +752,9 @@ 

      20

      Before we can put entries in the hash table, we do need a place to actually store them. We need to allocate an array of buckets. That happens in this function:

      -
      table.c
      +
      table.c
      add after findEntry()
      -
      static void adjustCapacity(Table* table, int capacity) {
      +
      static void adjustCapacity(Table* table, int capacity) {
         Entry* entries = ALLOCATE(Entry, capacity);
         for (int i = 0; i < capacity; i++) {
           entries[i].key = NULL;
      @@ -780,7 +780,7 @@ 

      20

      Those new buckets may have new collisions that we need to deal with. So the simplest way to get every entry where it belongs is to rebuild the table from scratch by re-inserting every entry into the new empty array.

      -
          entries[i].value = NIL_VAL;
      +
          entries[i].value = NIL_VAL;
         }
       
      table.c
      in adjustCapacity()
      @@ -807,7 +807,7 @@

      20 Table struct. That way, we can pass the new array and capacity before we’ve stored those in the struct.)

      After that’s done, we can release the memory for the old array.

      -
          dest->value = entry->value;
      +
          dest->value = entry->value;
         }
       
       
      table.c
      @@ -822,7 +822,7 @@

      20 maintain the desired load capacity.

      While we’re at it, let’s also define a helper function for copying all of the entries of one hash table into another.

      -
      bool tableSet(Table* table, ObjString* key, Value value);
      +
      bool tableSet(Table* table, ObjString* key, Value value);
       
      table.h
      add after tableSet()
      void tableAddAll(Table* from, Table* to);
      @@ -835,9 +835,9 @@ 

      20

      We won’t need this until much later when we support method inheritance, but we may as well implement it now while we’ve got all the hash table stuff fresh in our minds.

      -
      table.c
      +
      table.c
      add after tableSet()
      -
      void tableAddAll(Table* from, Table* to) {
      +
      void tableAddAll(Table* from, Table* to) {
         for (int i = 0; i < from->capacity; i++) {
           Entry* entry = &from->entries[i];
           if (entry->key != NULL) {
      @@ -855,7 +855,7 @@ 

      20 . 

      Now that our hash table contains some stuff, let’s start pulling things back out. Given a key, we can look up the corresponding value, if there is one, with this function:

      -
      void freeTable(Table* table);
      +
      void freeTable(Table* table);
       
      table.h
      add after freeTable()
      bool tableGet(Table* table, ObjString* key, Value* value);
      @@ -867,9 +867,9 @@ 

      20 .  true, otherwise it returns false. If the entry exists, the value output parameter points to the resulting value.

      Since findEntry() already does the hard work, the implementation isn’t bad.

      -
      table.c
      +
      table.c
      add after findEntry()
      -
      bool tableGet(Table* table, ObjString* key, Value* value) {
      +
      bool tableGet(Table* table, ObjString* key, Value* value) {
         if (table->count == 0) return false;
       
         Entry* entry = findEntry(table->entries, table->capacity, key);
      @@ -904,7 +904,7 @@ 

      20 . 4& list.

      At least the declaration is simple.

      -
      bool tableSet(Table* table, ObjString* key, Value value);
      +
      bool tableSet(Table* table, ObjString* key, Value value);
       
      table.h
      add after tableSet()
      bool tableDelete(Table* table, ObjString* key);
      @@ -936,9 +936,9 @@ 

      20 . 4& so that deleting an entry doesn’t break any implicit collision chains and we can still find entries after it.

      Instead of deleting 'biscuit', it's replaced with a tombstone.

      The code looks like this:

      -
      table.c
      +
      table.c
      add after tableSet()
      -
      bool tableDelete(Table* table, ObjString* key) {
      +
      bool tableDelete(Table* table, ObjString* key) {
         if (table->count == 0) return false;
       
         // Find the entry.
      @@ -966,7 +966,7 @@ 

      20 . 4& of the characteristics of an empty one.

      When we are following a probe sequence during a lookup, and we hit a tombstone, we note it and keep going.

      -
        for (;;) {
      +
        for (;;) {
           Entry* entry = &entries[index];
       
      table.c
      in findEntry()
      @@ -990,7 +990,7 @@

      20 . 4&
      table.c, in findEntry(), replace 3 lines

      The first time we pass a tombstone, we store it in this local variable:

      -
        uint32_t index = key->hash % capacity;
      +
        uint32_t index = key->hash % capacity;
       
      table.c
      in findEntry()
        Entry* tombstone = NULL;
      @@ -1028,7 +1028,7 @@ 

      20 . code. The count is no longer the number of entries in the hash table, it’s the number of entries plus tombstones. That implies that we increment the count during insertion only if the new entry goes into an entirely empty bucket.

      -
        bool isNewKey = entry->key == NULL;
      +
        bool isNewKey = entry->key == NULL;
       
      table.c
      in tableSet()
      replace 1 line
      @@ -1046,7 +1046,7 @@

      20 . over. They don’t add any value since we’re rebuilding the probe sequences anyway, and would just slow down lookups. That means we need to recalculate the count since it may change during a resize. So we clear it out:

      -
        }
      +
        }
       
       
      table.c
      in adjustCapacity()
      @@ -1056,7 +1056,7 @@

      20 .
      table.c, in adjustCapacity()

      Then each time we find a non-tombstone entry, we increment it.

      -
          dest->value = entry->value;
      +
          dest->value = entry->value;
       
      table.c
      in adjustCapacity()
          table->count++;
      @@ -1140,7 +1140,7 @@ 

      20 . 5< once we intern all the strings. In order to reliably deduplicate all strings, the VM needs to be able to find every string that’s created. We do that by giving it a hash table to store them all.

      -
        Value* stackTop;
      +
        Value* stackTop;
       
      vm.h
      in struct VM
        Table strings;
      @@ -1149,7 +1149,7 @@ 

      20 . 5<
      vm.h, in struct VM

      As usual, we need an include.

      -
      #include "chunk.h"
      +
      #include "chunk.h"
       
      vm.h
      #include "table.h"
       
      #include "value.h"
      @@ -1157,7 +1157,7 @@ 

      20 . 5<
      vm.h

      When we spin up a new VM, the string table is empty.

      -
        vm.objects = NULL;
      +
        vm.objects = NULL;
       
      vm.c
      in initVM()
        initTable(&vm.strings);
      @@ -1166,7 +1166,7 @@ 

      20 . 5<
      vm.c, in initVM()

      And when we shut down the VM, we clean up any resources used by the table.

      -
      void freeVM() {
      +
      void freeVM() {
       
      vm.c
      in freeVM()
        freeTable(&vm.strings);
      @@ -1177,7 +1177,7 @@ 

      20 . 5<

      Some languages have a separate type or an explicit step to intern a string. For clox, we’ll automatically intern every one. That means whenever we create a new unique string, we add it to the table.

      -
        string->hash = hash;
      +
        string->hash = hash;
       
      object.c
      in allocateString()
        tableSet(&vm.strings, string, NIL_VAL);
      @@ -1191,7 +1191,7 @@ 

      20 . 5<

      This gets a string into the table assuming that it’s unique, but we need to actually check for duplication before we get here. We do that in the two higher-level functions that call allocateString(). Here’s one:

      -
        uint32_t hash = hashString(chars, length);
      +
        uint32_t hash = hashString(chars, length);
       
      object.c
      in copyString()
        ObjString* interned = tableFindString(&vm.strings, chars, length,
      @@ -1207,7 +1207,7 @@ 

      20 . 5< string. Otherwise, we fall through, allocate a new string, and store it in the string table.

      Taking ownership of a string is a little different.

      -
        uint32_t hash = hashString(chars, length);
      +
        uint32_t hash = hashString(chars, length);
       
      object.c
      in takeString()
        ObjString* interned = tableFindString(&vm.strings, chars, length,
      @@ -1226,7 +1226,7 @@ 

      20 . 5< is being passed to this function and we no longer need the duplicate string, it’s up to us to free it.

      Before we get to the new function we need to write, there’s one more include.

      -
      #include "object.h"
      +
      #include "object.h"
       
      object.c
      #include "table.h"
       
      #include "value.h"
      @@ -1236,7 +1236,7 @@ 

      20 . 5<

      To look for a string in the table, we can’t use the normal tableGet() function because that calls findEntry(), which has the exact problem with duplicate strings that we’re trying to fix right now. Instead, we use this new function:

      -
      void tableAddAll(Table* from, Table* to);
      +
      void tableAddAll(Table* from, Table* to);
       
      table.h
      add after tableAddAll()
      ObjString* tableFindString(Table* table, const char* chars,
      @@ -1248,9 +1248,9 @@ 

      20 . 5<
      table.h, add after tableAddAll()

      The implementation looks like so:

      -
      table.c
      +
      table.c
      add after tableAddAll()
      -
      ObjString* tableFindString(Table* table, const char* chars,
      +
      ObjString* tableFindString(Table* table, const char* chars,
                                  int length, uint32_t hash) {
         if (table->count == 0) return NULL;
       
      @@ -1288,7 +1288,7 @@ 

      20 . 5<

      In fact, now that we’ve interned all the strings, we can take advantage of it in the bytecode interpreter. When a user does == on two objects that happen to be strings, we don’t need to test the characters any more.

      -
          case VAL_NUMBER: return AS_NUMBER(a) == AS_NUMBER(b);
      +
          case VAL_NUMBER: return AS_NUMBER(a) == AS_NUMBER(b);
       
      value.c
      in valuesEqual()
      replace 7 lines
      diff --git a/site/index.html b/site/index.html index baad2c804..8fc9e6315 100644 --- a/site/index.html +++ b/site/index.html @@ -1,5 +1,5 @@ - + Crafting Interpreters diff --git a/site/inheritance.html b/site/inheritance.html index 8b789cbdf..c569b402f 100644 --- a/site/inheritance.html +++ b/site/inheritance.html @@ -1,5 +1,5 @@ - + Inheritance · Crafting Interpreters @@ -151,7 +151,7 @@

      This late in the game, I’d rather not add a new reserved word or token to the lexer. We don’t have extends or even :, so we’ll follow Ruby and use a less-than sign (<).

      -
      class Doughnut {
      +
       

      To work this into the grammar, we add a new optional clause in our existing classDecl rule.

      -
      classDecl"class" IDENTIFIER ( "<" IDENTIFIER )?
      +
      classDecl"class" IDENTIFIER ( "<" IDENTIFIER )?
                        "{" function* "}" ;
       

      After the class name, you can have a < followed by the superclass’s name. The @@ -170,7 +170,7 @@

      no superclass, not even an implicit one.

      We want to capture this new syntax in the class declaration’s AST node.

      -
            "Block      : List<Stmt> statements",
      +
            "Block      : List<Stmt> statements",
       
      tool/GenerateAst.java
      in main()
      replace 1 line
      @@ -186,7 +186,7 @@

      The new parser code follows the grammar directly.

      -
          Token name = consume(IDENTIFIER, "Expect class name.");
      +
       
       

      Once we’ve (possibly) parsed a superclass declaration, we store it in the AST.

      -
          consume(RIGHT_BRACE, "Expect '}' after class body.");
      +
          consume(RIGHT_BRACE, "Expect '}' after class body.");
       
       
      lox/Parser.java
      in classDeclaration()
      @@ -215,7 +215,7 @@

      If we didn’t parse a superclass clause, the superclass expression will be null. We’ll have to make sure the later passes check for that. The first of those is the resolver.

      -
          define(stmt.name);
      +
          define(stmt.name);
       
      lox/Resolver.java
      in visitClassStmt()
      @@ -237,13 +237,13 @@ 

      Because even well-intentioned programmers sometimes write weird code, there’s a silly edge case we need to worry about while we’re in here. Take a look at this:

      -
      class Oops < Oops {}
      +
      class Oops < Oops {}
       

      There’s no way this will do anything useful, and if we let the runtime try to run this, it will break the expectation the interpreter has about there not being cycles in the inheritance chain. The safest thing is to detect this case statically and report it as an error.

      -
          define(stmt.name);
      +
       
       

      Assuming the code resolves without error, the AST travels to the interpreter.

      -
        public Void visitClassStmt(Stmt.Class stmt) {
      +
        public Void visitClassStmt(Stmt.Class stmt) {
       
      lox/Interpreter.java
      in visitClassStmt()
          Object superclass = null;
      @@ -278,7 +278,7 @@ 

      -
      var NotAClass = "I am totally not a class";
      +
      var NotAClass = "I am totally not a class";
       
       class Subclass < NotAClass {} // ?!
       
      @@ -286,7 +286,7 @@

      —its AST nodeinto its runtime representation, a LoxClass object. We need to plumb the superclass through to that too. We pass the superclass to the constructor.

      -
            methods.put(method.name.lexeme, function);
      +
            methods.put(method.name.lexeme, function);
           }
       
       

      The constructor stores it in a field.

      -
      lox/LoxClass.java
      +

      Which we declare here:

      -
        final String name;
      +
        final String name;
       
      lox/LoxClass.java
      in class LoxClass
        final LoxClass superclass;
      @@ -340,7 +340,7 @@ 

      13 .̴

      This lines up with one of the goals of inheritanceto give users a way to reuse code across classes. Implementing this in our interpreter is astonishingly easy.

      -
            return methods.get(name);
      +
            return methods.get(name);
           }
       
       
      lox/LoxClass.java
      @@ -356,7 +356,7 @@

      13 .̴

      That’s literally all there is to it. When we are looking up a method on an instance, if we don’t find it on the instance’s class, we recurse up through the superclass chain and look there. Give it a try:

      -
      class Doughnut {
      +
      class Doughnut {
         cook() {
           print "Fry until golden brown.";
         }
      @@ -382,7 +382,7 @@ 

      super for this, and we’ll use that same syntax in Lox. Here is an example:

      -
      class Doughnut {
      +
       

      If you run this, it should print:

      -
      Fry until golden brown.
      +
      Fry until golden brown.
       Pipe full of custard and coat with chocolate.
       

      We have a new expression form. The super keyword, followed by a dot and an @@ -409,11 +409,11 @@

      13 . 3 . 1super, the subsequent . and property name are inseparable parts of the super expression. You can’t have a bare super token all by itself.

      -
      print super; // Syntax error.
      +
      print super; // Syntax error.
       

      So the new clause we add to the primary rule in our grammar includes the property access as well.

      -
      primary"true" | "false" | "nil" | "this"
      +
      primary"true" | "false" | "nil" | "this"
                      | NUMBER | STRING | IDENTIFIER | "(" expression ")"
                      | "super" "." IDENTIFIER ;
       
      @@ -421,13 +421,13 @@

      13 . 3 . 1not part of the expression. Instead, a super call is a super access followed by a function call. Like other method calls, you can get a handle to a superclass method and invoke it separately.

      -
      var method = super.cook;
      +
      var method = super.cook;
       method();
       

      So the super expression itself contains only the token for the super keyword and the name of the method being looked up. The corresponding syntax tree node is thus:

      -
            "Set      : Expr object, Token name, Expr value",
      +
            "Set      : Expr object, Token name, Expr value",
       
      tool/GenerateAst.java
      in main()
            "Super    : Token keyword, Token method",
      @@ -440,7 +440,7 @@ 

      13 . 3 . 1

      Following the grammar, the new parsing code goes inside our existing primary() method.

      -
            return new Expr.Literal(previous().literal);
      +
            return new Expr.Literal(previous().literal);
           }
       
      lox/Parser.java
      in primary()
      @@ -467,7 +467,7 @@

      13 . 3 .  this, the object the surrounding method was called on. That coincidentally produces the right behavior in a lot of cases, but that’s not actually correct. Gaze upon:

      -
      class A {
      +
      class A {
         method() {
           print "A method";
         }
      @@ -544,7 +544,7 @@ 

      13 . 3 . 

      That’s a lot of machinery, but we’ll get through it a step at a time. Before we can get to creating the environment at runtime, we need to handle the corresponding scope chain in the resolver.

      -
            resolve(stmt.superclass);
      +
            resolve(stmt.superclass);
           }
       
      lox/Resolver.java
      in visitClassStmt()
      @@ -563,7 +563,7 @@

      13 . 3 . 

      If the class declaration has a superclass, then we create a new scope surrounding all of its methods. In that scope, we define the name “super”. Once we’re done resolving the class’s methods, we discard that scope.

      -
          endScope();
      +
          endScope();
       
       
      lox/Resolver.java
      in visitClassStmt()
      @@ -578,9 +578,9 @@

      13 . 3 .  a superclass since there’d be no superclass to store in it anyway.

      With “super” defined in a scope chain, we are able to resolve the super expression itself.

      -
      lox/Resolver.java
      +
      lox/Resolver.java
      add after visitSetExpr()
      -
        @Override
      +
        @Override
         public Void visitSuperExpr(Expr.Super expr) {
           resolveLocal(expr, expr.keyword);
           return null;
      @@ -593,7 +593,7 @@ 

      13 . 3 .  to walk to find the environment where the superclass is stored.

      This code is mirrored in the interpreter. When we evaluate a subclass definition, we create a new environment.

      -
              throw new RuntimeError(stmt.superclass.name,
      +
              throw new RuntimeError(stmt.superclass.name,
                   "Superclass must be a class.");
             }
           }
      @@ -618,7 +618,7 @@ 

      13 . 3 .  Then we create the LoxFunctions for each method. Those will capture the current environmentthe one where we just bound “super”as their closure, holding on to the superclass like we need. Once that’s done, we pop the environment.

      -
          LoxClass klass = new LoxClass(stmt.name.lexeme,
      +
          LoxClass klass = new LoxClass(stmt.name.lexeme,
               (LoxClass)superclass, methods);
       
      lox/Interpreter.java
      in visitClassStmt()
      @@ -635,9 +635,9 @@

      13 . 3 . 

      We’re ready to interpret super expressions themselves. There are a few moving parts, so we’ll build this method up in pieces.

      -
      lox/Interpreter.java
      +
      lox/Interpreter.java
      add after visitSetExpr()
      -
        @Override
      +
        @Override
         public Object visitSuperExpr(Expr.Super expr) {
           int distance = locals.get(expr);
           LoxClass superclass = (LoxClass)environment.getAt(
      @@ -658,7 +658,7 @@ 

      13 . 3 .  for the resolver to hang the number of hops to this on. Fortunately, we do control the layout of the environment chains. The environment where “this” is bound is always right inside the environment where we store “super”.

      -
          LoxClass superclass = (LoxClass)environment.getAt(
      +
          LoxClass superclass = (LoxClass)environment.getAt(
               distance, "super");
       
      lox/Interpreter.java
      in visitSuperExpr()
      @@ -678,7 +678,7 @@

      13 . 3 .  can’t hide the hacks by leaving them as an “exercise for the reader”.

      Now we’re ready to look up and bind the method, starting at the superclass.

      -
          LoxInstance object = (LoxInstance)environment.getAt(
      +
          LoxInstance object = (LoxInstance)environment.getAt(
               distance - 1, "this");
       
      lox/Interpreter.java
      in visitSuperExpr()
      @@ -695,7 +695,7 @@

      13 . 3 .  the class of the current object.

      That’s basically it. Except, of course, that we might fail to find the method. So we check for that too.

      -
      +
       
           LoxFunction method = superclass.findMethod(expr.method.lexeme);
       
      lox/Interpreter.java
      @@ -719,7 +719,7 @@

      13 

      As with previous language features, our implementation does the right thing when the user writes correct code, but we haven’t bulletproofed the intepreter against bad code. In particular, consider:

      -

    2. For example:

      -
      class Doughnut {
      +
       

      This should print:

      -
      Fry until golden brown.
      +
      Fry until golden brown.
       Pipe full of custard and coat with chocolate.
       Place in a nice box.
       
      diff --git a/site/introduction.html b/site/introduction.html index 6a7d75c86..d491bee95 100644 --- a/site/introduction.html +++ b/site/introduction.html @@ -1,5 +1,5 @@ - + Introduction · Crafting Interpreters @@ -269,7 +269,7 @@

      1 . 2 . 2

      A snippet with all the bells and whistles looks like this:

      -
      +
             default:
       
      lox/Scanner.java
      in scanToken()
      diff --git a/site/jumping-back-and-forth.html b/site/jumping-back-and-forth.html index e5ba61186..be1d3718a 100644 --- a/site/jumping-back-and-forth.html +++ b/site/jumping-back-and-forth.html @@ -1,5 +1,5 @@ - + Jumping Back and Forth · Crafting Interpreters @@ -119,7 +119,7 @@

      Jumping Back and Forth

      variable however we want to. In order to implement control flow, all that’s necessary is to change the ip in more interesting ways. The simplest control flow construct is an if statement with no else clause:

      -
      if (condition) print("condition was truthy");
      +
      if (condition) print("condition was truthy");
       

      The VM evaluates the bytecode for the condition expression. If the result is truthy, then it continues along and executes the print statement in the body. @@ -152,7 +152,7 @@

      23 . 1This many chapters in, you know the drill. Any new feature starts in the front end and works its way through the pipeline. An if statement is, well, a statement, so that’s where we hook it into the parser.

      -
        if (match(TOKEN_PRINT)) {
      +
       
       

      When we see an if keyword, we hand off compilation to this function:

      -
      compiler.c
      +
      compiler.c
      add after expressionStatement()
      -
      static void ifStatement() {
      +
      static void ifStatement() {
         consume(TOKEN_LEFT_PAREN, "Expect '(' after 'if'.");
         expression();
         consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition."); 
      @@ -182,7 +182,7 @@ 

      23 . 1Have you ever noticed that the ( after the if keyword doesn’t actually do anything useful? The language would be just as unambiguous and easy to parse without it, like:

      -
      if condition) print("looks weird");
      +
      if condition) print("looks weird");
       

      The closing ) is useful because it separates the condition expression from the body. Some languages use a then keyword instead. But the opening ( doesn’t @@ -212,9 +212,9 @@

      23 . 1A patch containing a number being sewn onto a sheet of bytecode.

      We encode this trick into two helper functions.

      -
      compiler.c
      +
      compiler.c
      add after emitBytes()
      -
      static int emitJump(uint8_t instruction) {
      +
      static int emitJump(uint8_t instruction) {
         emitByte(instruction);
         emitByte(0xff);
         emitByte(0xff);
      @@ -234,9 +234,9 @@ 

      23 . 1

      The function returns the offset of the emitted instruction in the chunk. After compiling the then branch, we take that offset and pass it to this:

      -
      compiler.c
      +
      compiler.c
      add after emitConstant()
      -
      static void patchJump(int offset) {
      +
      static void patchJump(int offset) {
         // -2 to adjust for the bytecode for the jump offset itself.
         int jump = currentChunk()->count - offset - 2;
       
      @@ -257,7 +257,7 @@ 

      23 . 1

      That’s all we need at compile time. Let’s define the new instruction.

      -
        OP_PRINT,
      +
       
       

      Over in the VM, we get it working like so:

      -
              break;
      +
              break;
             }
       
      vm.c
      in run()
      @@ -281,7 +281,7 @@

      23 . 1This is the first instruction we’ve added that takes a 16-bit operand. To read that from the chunk, we use a new macro.

      -
      #define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
      +
      #define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
       
      vm.c
      in run()
      #define READ_SHORT() \
      @@ -292,7 +292,7 @@ 

      23 . 1It yanks the next two bytes from the chunk and builds a 16-bit unsigned integer out of them. As usual, we clean up our macro when we’re done with it.

      -
      #undef READ_BYTE
      +
      #undef READ_BYTE
       
      vm.c
      in run()
      #undef READ_SHORT
      @@ -315,7 +315,7 @@ 

      23 . 1falsey() that takes a Lox Value and returns 1 if it’s falsey or 0 otherwise. Then we could implement the jump instruction like:

      -
      case OP_JUMP_IF_FALSE: {
      +
      case OP_JUMP_IF_FALSE: {
         uint16_t offset = READ_SHORT();
         vm.ip += falsey() * offset;
         break;
      @@ -334,7 +334,7 @@ 

      23 . 1 .&

      An if statement without support for else clauses is like Morticia Addams without Gomez. So, after we compile the then branch, we look for an else keyword. If we find one, we compile the else branch.

      -
        patchJump(thenJump);
      +
        patchJump(thenJump);
       
      compiler.c
      in ifStatement()
      @@ -352,7 +352,7 @@ 

      23 . 1 .& condition is true, after we run the then branch, we need to jump over the else branch. That way, in either case, we only execute a single branch, like this:

      Flowchart of the compiled bytecode for an if with an else clause.

      To implement that, we need another jump from the end of the then branch.

      -
        statement();
      +
        statement();
       
       
      compiler.c
      in ifStatement()
      @@ -363,7 +363,7 @@

      23 . 1 .&
      compiler.c, in ifStatement()

      We patch that offset after the end of the else body.

      -
        if (match(TOKEN_ELSE)) statement();
      +
        if (match(TOKEN_ELSE)) statement();
       
      compiler.c
      in ifStatement()
        patchJump(elseJump);
      @@ -374,7 +374,7 @@ 

      23 . 1 .&

      After executing the then branch, this jumps to the next statement after the else branch. Unlike the other jump, this jump is unconditional. We always take it, so we need another instruction that expresses that.

      -
        OP_PRINT,
      +
        OP_PRINT,
       
      chunk.h
      in enum OpCode
        OP_JUMP,
      @@ -383,7 +383,7 @@ 

      23 . 1 .&
      chunk.h, in enum OpCode

      We interpret it like so:

      -
              break;
      +
              break;
             }
       
      vm.c
      in run()
      @@ -409,7 +409,7 @@

      23 . 1 .& care that every execution path through the generated code pops the condition.

      When the condition is truthy, we pop it right before the code inside the then branch.

      -
        int thenJump = emitJump(OP_JUMP_IF_FALSE);
      +
        int thenJump = emitJump(OP_JUMP_IF_FALSE);
       
      compiler.c
      in ifStatement()
        emitByte(OP_POP);
      @@ -418,7 +418,7 @@ 

      23 . 1 .&
      compiler.c, in ifStatement()

      Otherwise, we pop it at the beginning of the else branch.

      -
        patchJump(thenJump);
      +
        patchJump(thenJump);
       
      compiler.c
      in ifStatement()
        emitByte(OP_POP);
      @@ -435,7 +435,7 @@ 

      23 . 1 .&

      If you trace through, you can see that it always executes a single branch and ensures the condition is popped first. All that remains is a little disassembler support.

      -
            return simpleInstruction("OP_PRINT", offset);
      +
            return simpleInstruction("OP_PRINT", offset);
       
      debug.c
      in disassembleInstruction()
          case OP_JUMP:
      @@ -448,9 +448,9 @@ 

      23 . 1 .&

      These two instructions have a new format with a 16-bit operand, so we add a new utility function to disassemble them.

      -
      debug.c
      +
      debug.c
      add after byteInstruction()
      -
      static int jumpInstruction(const char* name, int sign,
      +
      static int jumpInstruction(const char* name, int sign,
                                  Chunk* chunk, int offset) {
         uint16_t jump = (uint16_t)(chunk->code[offset + 1] << 8);
         jump |= chunk->code[offset + 2];
      @@ -477,7 +477,7 @@ 

      23 .  The easiest way to explain them is to just show you the compiler code and the control flow it produces in the resulting bytecode. Starting with and, we hook it into the expression parsing table here:

      -
        [TOKEN_NUMBER]        = {number,   NULL,   PREC_NONE},
      +
        [TOKEN_NUMBER]        = {number,   NULL,   PREC_NONE},
       
      compiler.c
      replace 1 line
        [TOKEN_AND]           = {NULL,     and_,   PREC_AND},
      @@ -486,9 +486,9 @@ 

      23 . 
      compiler.c, replace 1 line

      That hands off to a new parser function.

      -
      compiler.c
      +
      compiler.c
      add after defineVariable()
      -
      static void and_(bool canAssign) {
      +
      static void and_(bool canAssign) {
         int endJump = emitJump(OP_JUMP_IF_FALSE);
       
         emitByte(OP_POP);
      @@ -519,7 +519,7 @@ 

      23 . 

      23 . 2 . 1Logical or operator

      The or operator is a little more complex. First we add it to the parse table.

      -
        [TOKEN_NIL]           = {literal,  NULL,   PREC_NONE},
      +
        [TOKEN_NIL]           = {literal,  NULL,   PREC_NONE},
       
      compiler.c
      replace 1 line
        [TOKEN_OR]            = {NULL,     or_,    PREC_OR},
      @@ -528,9 +528,9 @@ 

      23 .
      compiler.c, replace 1 line

      When that parser consumes an infix or token, it calls this:

      -
      compiler.c
      +
      compiler.c
      add after number()
      -
      static void or_(bool canAssign) {
      +
      static void or_(bool canAssign) {
         int elseJump = emitJump(OP_JUMP_IF_FALSE);
         int endJump = emitJump(OP_JUMP);
       
      @@ -565,7 +565,7 @@ 

      23 . 3<

      That takes us to the looping statements, which jump backward so that code can be executed more than once. Lox only has two loop constructs, while and for. A while loop is (much) simpler, so we start the party there.

      -
          ifStatement();
      +
          ifStatement();
       
      compiler.c
      in statement()
        } else if (match(TOKEN_WHILE)) {
      @@ -575,9 +575,9 @@ 

      23 . 3<
      compiler.c, in statement()

      When we reach a while token, we call:

      -
      compiler.c
      +
      compiler.c
      add after printStatement()
      -
      static void whileStatement() {
      +
      static void whileStatement() {
         consume(TOKEN_LEFT_PAREN, "Expect '(' after 'while'.");
         expression();
         consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition.");
      @@ -602,7 +602,7 @@ 

      23 . 3<

      Really starting to second-guess my decision to use the same jump instructions for the logical operators.

      -
        statement();
      +
        statement();
       
      compiler.c
      in whileStatement()
        emitLoop(loopStart);
      @@ -618,7 +618,7 @@ 

      23 . 3< jump until after we emitted the jump instruction. We don’t have that problem now. We’ve already compiled the point in code that we want to jump back toit’s right before the condition expression.

      All we need to do is capture that location as we compile it.

      -
      static void whileStatement() {
      +
      static void whileStatement() {
       
      compiler.c
      in whileStatement()
        int loopStart = currentChunk()->count;
      @@ -631,9 +631,9 @@ 

      23 . 3< iteration. We store the chunk’s current instruction count in loopStart to record the offset in the bytecode right before the condition expression we’re about to compile. Then we pass that into this helper function:

      -
      compiler.c
      +
      compiler.c
      add after emitBytes()
      -
      static void emitLoop(int loopStart) {
      +
      static void emitLoop(int loopStart) {
         emitByte(OP_LOOP);
       
         int offset = currentChunk()->count - loopStart + 2;
      @@ -658,7 +658,7 @@ 

      23 . 3< to manually pack a signed 16-bit integer into two bytes, and we’ve got the opcode space available, so why not use it?

      The new instruction is here:

      -
        OP_JUMP_IF_FALSE,
      +
        OP_JUMP_IF_FALSE,
       
      chunk.h
      in enum OpCode
        OP_LOOP,
      @@ -667,7 +667,7 @@ 

      23 . 3<
      chunk.h, in enum OpCode

      And in the VM, we implement it thusly:

      -
            }
      +
            }
       
      vm.c
      in run()
            case OP_LOOP: {
      @@ -681,7 +681,7 @@ 

      23 . 3<

      The only difference from OP_JUMP is a subtraction instead of an addition. Disassembly is similar too.

      -
            return jumpInstruction("OP_JUMP_IF_FALSE", 1, chunk, offset);
      +
            return jumpInstruction("OP_JUMP_IF_FALSE", 1, chunk, offset);
       
      debug.c
      in disassembleInstruction()
          case OP_LOOP:
      @@ -721,7 +721,7 @@ 

      23 . 4

      We’ll work our way through the implementation a piece at a time, starting with the for keyword.

      -
          printStatement();
      +
          printStatement();
       
      compiler.c
      in statement()
        } else if (match(TOKEN_FOR)) {
      @@ -732,9 +732,9 @@ 

      23 . 4It calls a helper function. If we only supported for loops with empty clauses like for (;;), then we could implement it like this:

      -
      compiler.c
      +
      compiler.c
      add after expressionStatement()
      -
      static void forStatement() {
      +
      static void forStatement() {
         consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
         consume(TOKEN_SEMICOLON, "Expect ';'.");
       
      @@ -759,7 +759,7 @@ 

      23 . 423 . 4 . 1Initializer clause

      Now we’ll add the first clause, the initializer. It executes only once, before the body, so compiling is straightforward.

      -
        consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
      +
        consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
       
      compiler.c
      in forStatement()
      replace 1 line
      @@ -784,7 +784,7 @@

      23 .̴ initializer to leave anything on the stack.

      If a for statement declares a variable, that variable should be scoped to the loop body. We ensure that by wrapping the whole statement in a scope.

      -
      static void forStatement() {
      +
      static void forStatement() {
       
      compiler.c
      in forStatement()
        beginScope();
      @@ -793,7 +793,7 @@ 

      23 .̴
      compiler.c, in forStatement()

      Then we close it at the end.

      -
        emitLoop(loopStart);
      +
        emitLoop(loopStart);
       
      compiler.c
      in forStatement()
        endScope();
      @@ -803,7 +803,7 @@ 

      23 .̴

      23 . 4 . 2Condition clause

      Next, is the condition expression that can be used to exit the loop.

      -
        int loopStart = currentChunk()->count;
      +
        int loopStart = currentChunk()->count;
       
      compiler.c
      in forStatement()
      replace 1 line
      @@ -829,7 +829,7 @@

      23 . 4& value on the stack, we pop it before executing the body. That ensures we discard the value when the condition is true.

      After the loop body, we need to patch that jump.

      -
        emitLoop(loopStart);
      +
        emitLoop(loopStart);
       
      compiler.c
      in forStatement()
      @@ -857,7 +857,7 @@ 

      23 . 4& the next iteration.

      I know, a little weird, but hey, it beats manually managing ASTs in memory in C, right? Here’s the code:

      -
        }
      +
        }
       
       
      compiler.c
      in forStatement()
      @@ -914,7 +914,7 @@

      Challenges

    3. In addition to if statements, most C-family languages have a multi-way switch statement. Add one to clox. The grammar is:

      -
      switchStmt"switch" "(" expression ")"
      +
      switchStmt"switch" "(" expression ")"
                        "{" switchCase* defaultCase? "}" ;
       switchCase"case" expression ":" statement* ;
       defaultCase"default" ":" statement* ;
      @@ -932,7 +932,7 @@ 

      Challenges

    4. In jlox, we had a challenge to add support for break statements. This time, let’s do continue:

      -
      continueStmt"continue" ";" ;
      +
      continueStmt"continue" ";" ;
       

      A continue statement jumps directly to the top of the nearest enclosing loop, skipping the rest of the loop body. Inside a for loop, a continue @@ -1085,7 +1085,7 @@

      Design Note: Considering Goto Harmfu “switch inside a loop” is a classic one. Another is using a guard variable to exit out of a series of nested loops:

      -
      // See if the matrix contains a zero.
      +
       

      Is that really better than:

      -
      for (int x = 0; x < xSize; x++) {
      +
      for (int x = 0; x < xSize; x++) {
         for (int y = 0; y < ySize; y++) {
           for (int z = 0; z < zSize; z++) {
             if (matrix[x][y][z] == 0) {
      diff --git a/site/local-variables.html b/site/local-variables.html
      index 491667338..b67d91562 100644
      --- a/site/local-variables.html
      +++ b/site/local-variables.html
      @@ -1,5 +1,5 @@
       
      -
      +
       
       
       Local Variables · Crafting Interpreters
      @@ -182,7 +182,7 @@ 

      -
      } ParseRule;
      +
      } ParseRule;
       
      compiler.c
      add after struct ParseRule
      @@ -208,7 +208,7 @@ 

      We’re writing a single-pass compiler, so it’s not like we have too many other options for how to order them in the array.

      -
      #define DEBUG_TRACE_EXECUTION
      +
      #define DEBUG_TRACE_EXECUTION
       
      common.h
       
      @@ -230,7 +230,7 @@ 

      Each local in the array is one of these:

      -
      } ParseRule;
      +
      } ParseRule;
       
      compiler.c
      add after struct ParseRule
      @@ -263,7 +263,7 @@ 

      bad idea.

      -
      Parser parser;
      +
       
       

      Here’s a little function to initialize the compiler:

      -
      compiler.c
      +

      When we first start up the VM, we call it to get everything into a clean state.

      -
        initScanner(source);
      +
        initScanner(source);
       
      compiler.c
      in compile()
        Compiler compiler;
      @@ -301,7 +301,7 @@ 

      22 . 2< are a big chunk of work that we’ll tackle in a later chapter, so for now we’re only going to do blocks. As usual, we start with the syntax. The new grammar we’ll introduce is:

      -
      statementexprStmt
      +
      statementexprStmt
                      | printStmt
                      | block ;
       
      @@ -316,7 +316,7 @@ 

      22 . 2<

      Blocks are a kind of statement, so the rule for them goes in the statement production. The corresponding code to compile one looks like this:

      -
        if (match(TOKEN_PRINT)) {
      +
        if (match(TOKEN_PRINT)) {
           printStatement();
       
      compiler.c
      in statement()
      @@ -333,9 +333,9 @@

      22 . 2< -
      compiler.c
      +
      compiler.c
      add after expression()
      -
      static void block() {
      +
      static void block() {
         while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
           declaration();
         }
      @@ -353,9 +353,9 @@ 

      22 . 2< the other, so there isn’t much to compiling them. The semantically interesting thing blocks do is create scopes. Before we compile the body of a block, we call this function to enter a new local scope:

      -
      compiler.c
      +
      compiler.c
      add after endCompiler()
      -
      static void beginScope() {
      +
      static void beginScope() {
         current->scopeDepth++;
       }
       
      @@ -364,9 +364,9 @@

      22 . 2<

      In order to “create” a scope, all we do is increment the current depth. This is certainly much faster than jlox, which allocated an entire new HashMap for each one. Given beginScope(), you can probably guess what endScope() does.

      -
      compiler.c
      +
      compiler.c
      add after beginScope()
      -
      static void endScope() {
      +
      static void endScope() {
         current->scopeDepth--;
       }
       
      @@ -388,7 +388,7 @@

      2 the bytecode for storing the variable’s value in the global variable hash table.

      Both of those helpers need a few changes to support local variables. In parseVariable(), we add:

      -
        consume(TOKEN_IDENTIFIER, errorMessage);
      +
        consume(TOKEN_IDENTIFIER, errorMessage);
       
      compiler.c
      in parseVariable()
      @@ -407,7 +407,7 @@ 

      2 table index instead.

      Over in defineVariable(), we need to emit the code to store a local variable if we’re in a local scope. It looks like this:

      -
      static void defineVariable(uint8_t global) {
      +
      static void defineVariable(uint8_t global) {
       
      compiler.c
      in defineVariable()
        if (current->scopeDepth > 0) {
      @@ -431,9 +431,9 @@ 

      2

      The code on the left compiles to the sequence of instructions on the right.

      OK, so what’s “declaring” about? Here’s what that does:

      -
      compiler.c
      +
      compiler.c
      add after identifierConstant()
      -
      static void declareVariable() {
      +
      static void declareVariable() {
         if (current->scopeDepth == 0) return;
       
         Token* name = &parser.previous;
      @@ -449,9 +449,9 @@ 

      2

      But for local variables, the compiler does need to remember that the variable exists. That’s what declaring it doesit adds it to the compiler’s list of variables in the current scope. We implement that using another new function.

      -
      compiler.c
      +
      compiler.c
      add after identifierConstant()
      -
      static void addLocal(Token name) {
      +
      static void addLocal(Token name) {
         Local* local = &current->locals[current->localCount++];
         local->name = name;
         local->depth = current->scopeDepth;
      @@ -479,7 +479,7 @@ 

      2 one time.

      If we try to go over that, not only could we not refer to them at runtime, but the compiler would overwrite its own locals array, too. Let’s prevent that.

      -
      static void addLocal(Token name) {
      +
      static void addLocal(Token name) {
       
      compiler.c
      in addLocal()
        if (current->localCount == UINT8_COUNT) {
      @@ -492,7 +492,7 @@ 

      2
      compiler.c, in addLocal()

      The next case is trickier. Consider:

      -
      {
      +
      {
         var a = "first";
         var a = "second";
       }
      @@ -507,7 +507,7 @@ 

      2 code relies on it.

      Note that the above program is different from this one:

      -
      {
      +
      {
         var a = "outer";
         {
           var a = "inner";
      @@ -519,7 +519,7 @@ 

      2 shadowing, and Lox does allow that. It’s only an error to have two variables with the same name in the same local scope.

      We detect that error like so:

      -
        Token* name = &parser.previous;
      +
        Token* name = &parser.previous;
       
      compiler.c
      in declareVariable()
        for (int i = current->localCount - 1; i >= 0; i--) {
      @@ -549,9 +549,9 @@ 

      2 if we reach the beginning of the array or a variable owned by another scope, then we know we’ve checked all of the existing variables in the scope.

      To see if two identifiers are the same, we use this:

      -
      compiler.c
      +
      compiler.c
      add after identifierConstant()
      -
      static bool identifiersEqual(Token* a, Token* b) {
      +
      static bool identifiersEqual(Token* a, Token* b) {
         if (a->length != b->length) return false;
         return memcmp(a->start, b->start, a->length) == 0;
       }
      @@ -566,7 +566,7 @@ 

      2

      It would be a nice little optimization if we could check their hashes, but tokens aren’t full LoxStrings, so we haven’t calculated their hashes yet.

      -
      #include <stdlib.h>
      +
      #include <stdlib.h>
       
      compiler.c
      #include <string.h>
       
      @@ -578,7 +578,7 @@ 

      2

      With this, we’re able to bring variables into being. But, like ghosts, they linger on beyond the scope where they are declared. When a block ends, we need to put them to rest.

      -
        current->scopeDepth--;
      +
        current->scopeDepth--;
       
      compiler.c
      in endScope()
      @@ -614,7 +614,7 @@ 

      22 . 4U

      We already have code for getting and setting global variables, andlike good little software engineerswe want to reuse as much of that existing code as we can. Something like this:

      -
      static void namedVariable(Token name, bool canAssign) {
      +
      static void namedVariable(Token name, bool canAssign) {
       
      compiler.c
      in namedVariable()
      replace 1 line
      @@ -641,7 +641,7 @@

      22 . 4U existing bytecode instructions for globals.

      A little further down, we use those variables to emit the right instructions. For assignment:

      -
        if (canAssign && match(TOKEN_EQUAL)) {
      +
        if (canAssign && match(TOKEN_EQUAL)) {
           expression();
       
      compiler.c
      in namedVariable()
      @@ -652,7 +652,7 @@

      22 . 4U
      compiler.c, in namedVariable(), replace 1 line

      And for access:

      -
          emitBytes(setOp, (uint8_t)arg);
      +
          emitBytes(setOp, (uint8_t)arg);
         } else {
       
      compiler.c
      in namedVariable()
      @@ -664,9 +664,9 @@

      22 . 4U

      The real heart of this chapter, the part where we resolve a local variable, is here:

      -
      compiler.c
      +
      compiler.c
      add after identifiersEqual()
      -
      static int resolveLocal(Compiler* compiler, Token* name) {
      +
      static int resolveLocal(Compiler* compiler, Token* name) {
         for (int i = compiler->localCount - 1; i >= 0; i--) {
           Local* local = &compiler->locals[i];
           if (identifiersEqual(name, &local->name)) {
      @@ -698,7 +698,7 @@ 

      22 . 4U

      22 . 4 . 1Interpreting local variables

      Our compiler is emitting two new instructions, so let’s get them working. First is loading a local variable:

      -
        OP_POP,
      +
        OP_POP,
       
      chunk.h
      in enum OpCode
        OP_GET_LOCAL,
      @@ -707,7 +707,7 @@ 

      chunk.h, in enum OpCode

      And its implementation:

      -
            case OP_POP: pop(); break;
      +
            case OP_POP: pop(); break;
       
      vm.c
      in run()
            case OP_GET_LOCAL: {
      @@ -731,7 +731,7 @@ 

      Next is assignment:

      -
        OP_GET_LOCAL,
      +
       
       

      You can probably predict the implementation.

      -
            }
      +
            }
       
      vm.c
      in run()
            case OP_SET_LOCAL: {
      @@ -758,7 +758,7 @@ 

      Our disassembler is incomplete without support for these two new instructions.

      -
            return simpleInstruction("OP_POP", offset);
      +
            return simpleInstruction("OP_POP", offset);
       
      debug.c
      in disassembleInstruction()
          case OP_GET_LOCAL:
      @@ -781,9 +781,9 @@ 

      -
      debug.c
      +
      debug.c
      add after simpleInstruction()
      -
      static int byteInstruction(const char* name, Chunk* chunk,
      +
      static int byteInstruction(const char* name, Chunk* chunk,
                                  int offset) {
         uint8_t slot = chunk->code[offset + 1];
         printf("%-16s %4d\n", name, slot);
      @@ -803,7 +803,7 @@ 

      22

      No, not even Scheme.

      We’ve got one more edge case to deal with before we end this chapter. Recall this strange beastie we first met in jlox’s implementation of variable resolution:

      -

    5. How do other languages handle code like this:

      -
      var a = a;
      +
      var a = a;
       

      What would you do if it was your language? Why?

    6. diff --git a/site/methods-and-initializers.html b/site/methods-and-initializers.html index 30f20aef0..6e1cd42d5 100644 --- a/site/methods-and-initializers.html +++ b/site/methods-and-initializers.html @@ -1,5 +1,5 @@ - + Methods and Initializers · Crafting Interpreters @@ -113,7 +113,7 @@

      28 .& this time. The runtime representation for methods in clox is similar to that of jlox. Each class stores a hash table of methods. Keys are method names, and each value is an ObjClosure for the body of the method.

      -

    We’ll incrementally write the compiler code to see how those all get through to the runtime, starting here:

    -
    compiler.c
    +
    compiler.c
    add after function()
    -
    static void method() {
    +
    static void method() {
       consume(TOKEN_IDENTIFIER, "Expect method name.");
       uint8_t constant = identifierConstant(&parser.previous);
       emitBytes(OP_METHOD, constant);
    @@ -235,7 +235,7 @@ 

    compiler adds the method name token’s lexeme to the constant table, getting back a table index. Then we emit an OP_METHOD instruction with that index as the operand. That’s the name. Next is the method body:

    -
      uint8_t constant = identifierConstant(&parser.previous);
    +
      uint8_t constant = identifierConstant(&parser.previous);
     
    compiler.c
    in method()
    @@ -263,7 +263,7 @@ 

    Fear not. The compiler does know the name of the class. We can capture it right after we consume its token.

    -
      consume(TOKEN_IDENTIFIER, "Expect class name.");
    +
      consume(TOKEN_IDENTIFIER, "Expect class name.");
     
    compiler.c
    in classDeclaration()
      Token className = parser.previous;
    @@ -274,7 +274,7 @@ 

    And we know that no other declaration with that name could possibly shadow the class. So we do the easy fix. Before we start binding methods, we emit whatever code is necessary to load the class back on top of the stack.

    -
      defineVariable(nameConstant);
    +
      defineVariable(nameConstant);
     
     
    compiler.c
    in classDeclaration()
    @@ -298,7 +298,7 @@

    method’s closure on top with the class right under it. Once we’ve reached the end of the methods, we no longer need the class and tell the VM to pop it off the stack.

    -
      consume(TOKEN_RIGHT_BRACE, "Expect '}' after class body.");
    +
      consume(TOKEN_RIGHT_BRACE, "Expect '}' after class body.");
     
    compiler.c
    in classDeclaration()
      emitByte(OP_POP);
    @@ -308,7 +308,7 @@ 

    Putting all of that together, here is an example class declaration to throw at the compiler:

    -
    class Brunch {
    +
    class Brunch {
       bacon() {}
       eggs() {}
     }
    @@ -319,7 +319,7 @@ 

    instruction.

    28 . 1 . 3Executing method declarations

    First we define the opcode.

    -
      OP_CLASS,
    +
      OP_CLASS,
     
    chunk.h
    in enum OpCode
      OP_METHOD
    @@ -328,7 +328,7 @@ 

    chunk.h, in enum OpCode

    We disassemble it like other instructions that have string constant operands.

    -
        case OP_CLASS:
    +
        case OP_CLASS:
           return constantInstruction("OP_CLASS", chunk, offset);
     
    debug.c
    in disassembleInstruction()
    @@ -339,7 +339,7 @@

    debug.c, in disassembleInstruction()

    And over in the interpreter, we add a new case too.

    -
            break;
    +
            break;
     
    vm.c
    in run()
          case OP_METHOD:
    @@ -350,9 +350,9 @@ 

    vm.c, in run()

    There, we read the method name from the constant table and pass it here:

    -
    vm.c
    +
    vm.c
    add after closeUpvalues()
    -
    static void defineMethod(ObjString* name) {
    +
    static void defineMethod(ObjString* name) {
       Value method = peek(0);
       ObjClass* klass = AS_CLASS(peek(1));
       tableSet(&klass->methods, name, method);
    @@ -385,11 +385,11 @@ 

    28 . 2Method References

    Most of the time, methods are accessed and immediately called, leading to this familiar syntax:

    -
    instance.method(argument);
    +
    instance.method(argument);
     

    But remember, in Lox and some other languages, those two steps are distinct and can be separated.

    -
    var closure = instance.method;
    +
    var closure = instance.method;
     closure(argument);
     

    Since users can separate the operations, we have to implement them separately. @@ -400,7 +400,7 @@

    28 .  return the ObjClosure associated with that name. But we also need to remember that when you access a method, this gets bound to the instance the method was accessed from. Here’s the example from when we added methods to jlox:

    -
    class Person {
    +
    class Person {
       sayName() {
         print this.name;
       }
    @@ -432,7 +432,7 @@ 

    28 . 2  and I used its implementation for inspiration.

    Here’s the new object type:

    -
    } ObjInstance;
    +
    } ObjInstance;
     
     
    object.h
    add after struct ObjInstance
    @@ -453,7 +453,7 @@

    28 . 2  general functions.

    The new struct implies the usual boilerplate you’re used to by now. A new case in the object type enum:

    -
    typedef enum {
    +
    typedef enum {
     
    object.h
    in enum ObjType
      OBJ_BOUND_METHOD,
    @@ -462,7 +462,7 @@ 

    28 . 2 
    object.h, in enum ObjType

    A macro to check a value’s type:

    -
    #define OBJ_TYPE(value)        (AS_OBJ(value)->type)
    +
    #define OBJ_TYPE(value)        (AS_OBJ(value)->type)
     
     
    object.h
    #define IS_BOUND_METHOD(value) isObjType(value, OBJ_BOUND_METHOD)
    @@ -471,7 +471,7 @@ 

    28 . 2 
    object.h

    Another macro to cast the value to an ObjBoundMethod pointer:

    -
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
    +
    #define IS_STRING(value)       isObjType(value, OBJ_STRING)
     
     
    object.h
    #define AS_BOUND_METHOD(value) ((ObjBoundMethod*)AS_OBJ(value))
    @@ -480,7 +480,7 @@ 

    28 . 2 
    object.h

    A function to create a new ObjBoundMethod:

    -
    } ObjBoundMethod;
    +
    } ObjBoundMethod;
     
     
    object.h
    add after struct ObjBoundMethod
    @@ -491,9 +491,9 @@

    28 . 2 
    object.h, add after struct ObjBoundMethod

    And an implementation of that function here:

    -
    object.c
    +
    object.c
    add after allocateObject()
    -
    ObjBoundMethod* newBoundMethod(Value receiver,
    +
    ObjBoundMethod* newBoundMethod(Value receiver,
                                    ObjClosure* method) {
       ObjBoundMethod* bound = ALLOCATE_OBJ(ObjBoundMethod,
                                            OBJ_BOUND_METHOD);
    @@ -506,7 +506,7 @@ 

    28 . 2 

    The constructor-like function simply stores the given closure and receiver. When the bound method is no longer needed, we free it.

    -
      switch (object->type) {
    +
      switch (object->type) {
     
    memory.c
    in freeObject()
        case OBJ_BOUND_METHOD:
    @@ -519,7 +519,7 @@ 

    28 . 2 

    The bound method has a couple of references, but it doesn’t own them, so it frees nothing but itself. However, those references do get traced by the garbage collector.

    -
      switch (object->type) {
    +
      switch (object->type) {
     
    memory.c
    in blackenObject()
        case OBJ_BOUND_METHOD: {
    @@ -542,7 +542,7 @@ 

    28 . 2  rely on that.

    The last operation all objects support is printing.

    -
      switch (OBJ_TYPE(value)) {
    +
      switch (OBJ_TYPE(value)) {
     
    object.c
    in printObject()
        case OBJ_BOUND_METHOD:
    @@ -571,7 +571,7 @@ 

    28 .  and replace the top of the stack with the accessed property.

    The interpreter already handles fields, so we simply extend the OP_GET_PROPERTY case with another section.

    -
              pop(); // Instance.
    +
              pop(); // Instance.
               push(value);
               break;
             }
    @@ -597,9 +597,9 @@ 

    28 .  found. Since the name also wasn’t a field, that means we have a runtime error, which aborts the interpreter.

    Here is the good stuff:

    -
    vm.c
    +
    vm.c
    add after callValue()
    -
    static bool bindMethod(ObjClass* klass, ObjString* name) {
    +
    static bool bindMethod(ObjClass* klass, ObjString* name) {
       Value method;
       if (!tableGet(&klass->methods, name, &method)) {
         runtimeError("Undefined property '%s'.", name->chars);
    @@ -621,7 +621,7 @@ 

    28 .  home on top of the stack. Finally, we pop the instance and replace the top of the stack with the bound method.

    For example:

    -
    class Brunch {
    +
    class Brunch {
       eggs() {}
     }
     
    @@ -642,7 +642,7 @@ 

    28 . 2

    A bound method is a first-class value, so they can store it in variables, pass it to functions, and otherwise do “value”-y stuff with it.

    -
        switch (OBJ_TYPE(callee)) {
    +
        switch (OBJ_TYPE(callee)) {
     
    vm.c
    in callValue()
          case OBJ_BOUND_METHOD: {
    @@ -657,7 +657,7 @@ 

    28 . 2 call() helper to begin an invocation of that closure by pushing a CallFrame for it onto the call stack. That’s all it takes to be able to run this Lox program:

    -
    class Scone {
    +
    class Scone {
       topping(first, second) {
         print "scone with " + first + " and " + second;
       }
    @@ -676,7 +676,7 @@ 

    28 . 3This

    this expressions. It’s time for some new syntax. The lexer already treats this as a special token type, so the first step is wiring that token up in the parse table.

    -
      [TOKEN_SUPER]         = {NULL,     NULL,   PREC_NONE},
    +
      [TOKEN_SUPER]         = {NULL,     NULL,   PREC_NONE},
     
    compiler.c
    replace 1 line
      [TOKEN_THIS]          = {this_,    NULL,   PREC_NONE},
    @@ -690,9 +690,9 @@ 

    28 . 3This

    When the parser encounters a this in prefix position, it dispatches to a new parser function.

    -
    compiler.c
    +
    compiler.c
    add after variable()
    -
    static void this_(bool canAssign) {
    +
    static void this_(bool canAssign) {
       variable(false);
     } 
     
    @@ -725,7 +725,7 @@

    28 . 3This

    receiver. Slot zero will store the instance that this is bound to. In order to compile this expressions, the compiler simply needs to give the correct name to that local variable.

    -
      local->isCaptured = false;
    +
      local->isCaptured = false;
     
    compiler.c
    in initCompiler()
    replace 2 lines
    @@ -744,7 +744,7 @@

    28 . 3This

    And, in fact, they must not declare a variable named “this”, so that if you write a this expression inside a function declaration which is itself inside a method, the this correctly resolves to the outer method’s receiver.

    -
    class Nested {
    +
    class Nested {
       method() {
         fun function() {
           print this;
    @@ -760,7 +760,7 @@ 

    28 . 3This

    local slot zero, the compiler needs to know whether it’s compiling a function or method declaration, so we add a new case to our FunctionType enum to distinguish methods.

    -
      TYPE_FUNCTION,
    +
      TYPE_FUNCTION,
     
    compiler.c
    in enum FunctionType
      TYPE_METHOD,
    @@ -769,7 +769,7 @@ 

    28 . 3This

    compiler.c, in enum FunctionType

    When we compile a method, we use that type.

    -
      uint8_t constant = identifierConstant(&parser.previous);
    +
      uint8_t constant = identifierConstant(&parser.previous);
     
     
    compiler.c
    in method()
    @@ -784,7 +784,7 @@

    28 . 3This

    can even capture this and store the receiver in upvalues. Pretty cool.

    Except that at runtime, the receiver isn’t actually in slot zero. The interpreter isn’t holding up its end of the bargain yet. Here is the fix:

    -
          case OBJ_BOUND_METHOD: {
    +
          case OBJ_BOUND_METHOD: {
             ObjBoundMethod* bound = AS_BOUND_METHOD(callee);
     
    vm.c
    in callValue()
    @@ -798,7 +798,7 @@

    28 . 3This

    then just under those is the closure of the called method. That’s where slot zero in the new CallFrame will be. This line of code inserts the receiver into that slot. For example, given a method call like this:

    -
    scone.topping("berries", "cream");
    +
    scone.topping("berries", "cream");
     

    We calculate the slot to store the receiver like so:

    Skipping over the argument stack slots to find the slot containing the closure.

    The -argCount skips past the arguments and the - 1 adjusts for the fact that @@ -808,7 +808,7 @@

    28 . 3  sure it properly handles users misusing this. Lox says it is a compile error for a this expression to appear outside of the body of a method. These two wrong uses should be caught by the compiler:

    -
    print this; // At top level.
    +
    print this; // At top level.
     
     fun notMethod() {
       print this; // In a function.
    @@ -827,7 +827,7 @@ 

    28 . 3  If we had that, we could use it here to determine if we are inside a method. So we may as well make our future selves’ lives a little easier and put that machinery in place now.

    -
    Compiler* current = NULL;
    +
    Compiler* current = NULL;
     
    compiler.c
    add after variable current
    ClassCompiler* currentClass = NULL;
    @@ -839,7 +839,7 @@ 

    28 . 3 

    This module variable points to a struct representing the current, innermost class being compiled. The new type looks like this:

    -
    } Compiler;
    +
    } Compiler;
     
    compiler.c
    add after struct Compiler
    @@ -861,7 +861,7 @@ 

    28 . 3 

    If we aren’t inside any class declaration at all, the module variable currentClass is NULL. When the compiler begins compiling a class, it pushes a new ClassCompiler onto that implicit linked stack.

    -
      defineVariable(nameConstant);
    +
      defineVariable(nameConstant);
     
     
    compiler.c
    in classDeclaration()
    @@ -877,7 +877,7 @@

    28 . 3  capability we get by writing our compiler using recursive descent. At the end of the class body, we pop that compiler off the stack and restore the enclosing one.

    -
      emitByte(OP_POP);
    +
      emitByte(OP_POP);
     
    compiler.c
    in classDeclaration()
    @@ -890,7 +890,7 @@ 

    28 . 3 

    When an outermost class body ends, enclosing will be NULL, so this resets currentClass to NULL. Thus, to see if we are inside a classand therefore inside a methodwe simply check that module variable.

    -
    static void this_(bool canAssign) {
    +
    static void this_(bool canAssign) {
     
    compiler.c
    in this_()
      if (currentClass == NULL) {
    @@ -941,7 +941,7 @@ 

    28  -
      if (canAssign && match(TOKEN_EQUAL)) {
    +
      if (canAssign && match(TOKEN_EQUAL)) {
         expression();
         emitBytes(OP_SET_PROPERTY, name);
     
    compiler.c
    @@ -1227,7 +1227,7 @@

    28 

    In other words, this single instruction combines the operands of the OP_GET_PROPERTY and OP_CALL instructions it replaces, in that order. It really is a fusion of those two instructions. Let’s define it.

    -
      OP_CALL,
    +
      OP_CALL,
     
    chunk.h
    in enum OpCode
      OP_INVOKE,
    @@ -1236,7 +1236,7 @@ 

    28 
    chunk.h, in enum OpCode

    And add it to the disassembler:

    -
        case OP_CALL:
    +
        case OP_CALL:
           return byteInstruction("OP_CALL", chunk, offset);
     
    debug.c
    in disassembleInstruction()
    @@ -1248,9 +1248,9 @@

    28 

    This is a new, special instruction format, so it needs a little custom disassembly logic.

    -
    debug.c
    +
    debug.c
    add after constantInstruction()
    -
    static int invokeInstruction(const char* name, Chunk* chunk,
    +
    static int invokeInstruction(const char* name, Chunk* chunk,
                                     int offset) {
       uint8_t constant = chunk->code[offset + 1];
       uint8_t argCount = chunk->code[offset + 2];
    @@ -1265,7 +1265,7 @@ 

    28 

    We read the two operands and then print out both the method name and the argument count. Over in the interpreter’s bytecode dispatch loop is where the real action begins.

    -
          }
    +
          }
     
    vm.c
    in run()
          case OP_INVOKE: {
    @@ -1290,9 +1290,9 @@ 

    28 

    Finally, assuming the invocation succeeded, then there is a new CallFrame on the stack, so we refresh our cached copy of the current frame in frame.

    The interesting work happens here:

    -
    vm.c
    +
    vm.c
    add after callValue()
    -
    static bool invoke(ObjString* name, int argCount) {
    +
    static bool invoke(ObjString* name, int argCount) {
       Value receiver = peek(argCount);
       ObjInstance* instance = AS_INSTANCE(receiver);
       return invokeFromClass(instance->klass, name, argCount);
    @@ -1306,7 +1306,7 @@ 

    28 

    That does assume the object is an instance. As with OP_GET_PROPERTY instructions, we also need to handle the case where a user incorrectly tries to call a method on a value of the wrong type.

    -
      Value receiver = peek(argCount);
    +
      Value receiver = peek(argCount);
     
    vm.c
    in invoke()
    @@ -1327,9 +1327,9 @@ 

    28 

    As you can guess by now, we split this code into a separate function because we’re going to reuse it laterin this case for super calls.

    -
    vm.c
    +
    vm.c
    add after callValue()
    -
    static bool invokeFromClass(ObjClass* klass, ObjString* name,
    +
    static bool invokeFromClass(ObjClass* klass, ObjString* name,
                                 int argCount) {
       Value method;
       if (!tableGet(&klass->methods, name, &method)) {
    @@ -1374,7 +1374,7 @@ 

    28 . 5 Users like it when a language implementation gives them an answer faster, but only if it’s the right answer. Alas, our implementation of faster method invocations fails to uphold that principle:

    -
    class Oops {
    +
    class Oops {
       init() {
         fun f() {
           print "not a method";
    @@ -1403,7 +1403,7 @@ 

    28 . 5

    Earlier, when we implemented OP_GET_PROPERTY, we handled both field and method accesses. To squash this new bug, we need to do the same thing for OP_INVOKE.

    -
      ObjInstance* instance = AS_INSTANCE(receiver);
    +
      ObjInstance* instance = AS_INSTANCE(receiver);
     
    vm.c
    in invoke()
    diff --git a/site/optimization.html b/site/optimization.html
    index efc8f3c39..9429ac93d 100644
    --- a/site/optimization.html
    +++ b/site/optimization.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Optimization · Crafting Interpreters
    @@ -115,7 +115,7 @@ 

    30  long gone, separated from the present by microcode, cache lines, branch prediction, deep compiler pipelines, and mammoth instruction sets. We like to pretend C is a “low-level” language, but the stack of technology between

    -
    printf("Hello, world!");
    +
    printf("Hello, world!");
     

    and a greeting appearing on screen is now perilously tall.

    Optimization today is an empirical science. Our program is a border collie @@ -228,7 +228,7 @@

    3 interpreter. In a dynamically typed language like Lox, a large fraction of user code is field accesses and method calls, so one of my benchmarks looked something like this:

    -
    class Zoo {
    +
    class Zoo {
       init() {
         this.aardvark = 1;
         this.baboon   = 1;
    @@ -309,7 +309,7 @@ 

    30 . 

    If you take a look at tableGet(), you’ll see it’s mostly a wrapper around a call to findEntry() where the actual hash table lookup happens. To refresh your memory, here it is in full:

    -
    static Entry* findEntry(Entry* entries, int capacity,
    +
    static Entry* findEntry(Entry* entries, int capacity,
                             ObjString* key) {
       uint32_t index = key->hash % capacity;
       Entry* tombstone = NULL;
    @@ -336,7 +336,7 @@ 

    30 . 

    When running that previous benchmarkon my machine, at leastthe VM spends 70% of the total execution time on one line in this function. Any guesses as to which one? No? It’s this:

    -
      uint32_t index = key->hash % capacity;
    +
      uint32_t index = key->hash % capacity;
     

    That pointer dereference isn’t the problem. It’s the little %. It turns out the modulo operator is really slow. Much slower than other 30 .  think it through, it should make sense. We can replace that slow modulo operator with a very fast decrement and bitwise AND. We simply change the offending line of code to this:

    -
    static Entry* findEntry(Entry* entries, int capacity,
    +
    static Entry* findEntry(Entry* entries, int capacity,
                             ObjString* key) {
     
    table.c
    in findEntry()
    @@ -393,7 +393,7 @@

    30 . 

    Our linear probing search may need to wrap around the end of the array, so there is another modulo in findEntry() to update.

    -
          // We found the key.
    +
          // We found the key.
           return entry;
         }
     
    @@ -411,7 +411,7 @@ 

    30 .  optimizations there too. This function is called only when interning strings, which wasn’t heavily stressed by our benchmark. But a Lox program that created lots of strings might noticeably benefit from this change.

    -
      if (table->count == 0) return NULL;
    +
      if (table->count == 0) return NULL;
     
     
    table.c
    in tableFindString()
    @@ -423,7 +423,7 @@

    30 . 
    table.c, in tableFindString(), replace 1 line

    And also when the linear probing wraps around.

    -
          return entry->key;
    +
          return entry->key;
         }
     
     
    table.c
    @@ -621,7 +621,7 @@

    30 . of its value representation. To avoid that, we’ll maintain support for both the old tagged union implementation of Value and the new NaN-boxed form. We select which representation we want at compile time using this flag:

    -
    #include <stdint.h>
    +
    #include <stdint.h>
     
     
    common.h
    #define NAN_BOXING
    @@ -636,7 +636,7 @@ 

    30 . continue along its merry way.

    Most of the work happens in the “value” module where we add a section for the new type.

    -
    typedef struct ObjString ObjString;
    +
    typedef struct ObjString ObjString;
     
     
    value.h
    #ifdef NAN_BOXING
    @@ -656,7 +656,7 @@ 

    30 . of this module, the rest of the VM doesn’t really care one way or the other.

    Before we start re-implementing those macros, we close the #else branch of the #ifdef at the end of the definitions for the old representation.

    -
    #define OBJ_VAL(object)   ((Value){VAL_OBJ, {.obj = (Obj*)object}})
    +
    #define OBJ_VAL(object)   ((Value){VAL_OBJ, {.obj = (Obj*)object}})
     
    value.h
     
    @@ -700,7 +700,7 @@ 

    30 . 3 . 3 -
    typedef uint64_t Value;
    +
     
     

    That macro passes the double here:

    -
    #define NUMBER_VAL(num) numToValue(num)
    +
    #define NUMBER_VAL(num) numToValue(num)
     
    value.h
     
    @@ -735,7 +735,7 @@ 

    30 . 3 . 3memcpy() entirely.

    “Unwrapping” a Lox number is the mirror image.

    -
    typedef uint64_t Value;
    +
     
     

    That macro calls this function:

    -
    #define NUMBER_VAL(num) numToValue(num)
    +
    #define NUMBER_VAL(num) numToValue(num)
     
    value.h
     
    @@ -769,7 +769,7 @@ 

    30 . 3 . 3

    If you find yourself with a compiler that does not optimize the memcpy() away, try this instead:

    -
    double valueToNum(Value value) {
    +
     
    -
    #define clox_value_h
    +
    #define clox_value_h
     
    value.h
     
    @@ -794,7 +794,7 @@ 

    30 . 3 . 3test on a Lox number is a little more interesting. If all we have are exactly the bits for a double, how do we tell that it is a double? It’s time to get bit twiddling.

    -
    typedef uint64_t Value;
    +
    typedef uint64_t Value;
     
    value.h
     
    @@ -822,7 +822,7 @@ 

    30 . 3 . 3

    The set of quiet NaN bits are declared like this:

    -
    #ifdef NAN_BOXING
    +
    #ifdef NAN_BOXING
     
    value.h
     
    @@ -846,7 +846,7 @@ 

    30 .̴ lowest bits of our unused mantissa space as a “type tag” to determine which of these three singleton values we’re looking at. The three type tags are defined like so:

    -
    #define QNAN     ((uint64_t)0x7ffc000000000000)
    +
    #define QNAN     ((uint64_t)0x7ffc000000000000)
     
    value.h
     
    @@ -862,7 +862,7 @@ 

    30 .̴

    Our representation of nil is thus all of the bits required to define our quiet NaN representation along with the nil type tag bits:

    The bit representation of the nil value.

    In code, we check the bits like so:

    -
    #define AS_NUMBER(value)    valueToNum(value)
    +
    #define AS_NUMBER(value)    valueToNum(value)
     
     
    value.h
    #define NIL_VAL         ((Value)(uint64_t)(QNAN | TAG_NIL))
    @@ -876,7 +876,7 @@ 

    30 .̴

    Since nil has only a single bit representation, we can use equality on uint64_t to see if a Value is nil.

    -
    typedef uint64_t Value;
    +
    typedef uint64_t Value;
     
     
    value.h
    #define IS_NIL(value)       ((value) == NIL_VAL)
    @@ -885,7 +885,7 @@ 

    30 .̴
    value.h

    You can guess how we define the true and false values.

    -
    #define AS_NUMBER(value)    valueToNum(value)
    +
    #define AS_NUMBER(value)    valueToNum(value)
     
     
    value.h
    #define FALSE_VAL       ((Value)(uint64_t)(QNAN | TAG_FALSE))
    @@ -897,7 +897,7 @@ 

    30 .̴

    The bits look like this:

    The bit representation of the true and false values.

    To convert a C bool into a Lox Boolean, we rely on these two singleton values and the good old conditional operator.

    -
    #define AS_NUMBER(value)    valueToNum(value)
    +
    #define AS_NUMBER(value)    valueToNum(value)
     
     
    value.h
    #define BOOL_VAL(b)     ((b) ? TRUE_VAL : FALSE_VAL)
    @@ -908,7 +908,7 @@ 

    30 .̴

    There’s probably a cleverer bitwise way to do this, but my hunch is that the compiler can figure one out faster than I can. Going the other direction is simpler.

    -
    #define IS_NUMBER(value)    (((value) & QNAN) != QNAN)
    +
    #define IS_NUMBER(value)    (((value) & QNAN) != QNAN)
     
     
    value.h
    #define AS_BOOL(value)      ((value) == TRUE_VAL)
    @@ -920,7 +920,7 @@ 

    30 .̴ in C where any non-zero value can be considered “true”if it ain’t true, it must be false. This macro does assume you call it only on a Value that you know is a Lox Boolean. To check that, there’s one more macro.

    -
    typedef uint64_t Value;
    +
    typedef uint64_t Value;
     
     
    value.h
    #define IS_BOOL(value)      (((value) | 1) == TRUE_VAL)
    @@ -929,7 +929,7 @@ 

    30 .̴
    value.h

    That looks a little strange. A more obvious macro would look like this:

    -
    #define IS_BOOL(v) ((v) == TRUE_VAL || (v) == FALSE_VAL)
    +
    #define IS_BOOL(v) ((v) == TRUE_VAL || (v) == FALSE_VAL)
     

    Unfortunately, that’s not safe. The expansion mentions v twice, which means if that expression has any side effects, they will be executed twice. We could have @@ -976,7 +976,7 @@

    30 . 3 . 5Bit representation of an Obj* stored in a Value.

    To convert a raw Obj pointer to a Value, we take the pointer and set all of the quiet NaN bits and the sign bit.

    -
    #define NUMBER_VAL(num) numToValue(num)
    +
    #define NUMBER_VAL(num) numToValue(num)
     
    value.h
    #define OBJ_VAL(obj) \
         (Value)(SIGN_BIT | QNAN | (uint64_t)(uintptr_t)(obj))
    @@ -1001,7 +1001,7 @@ 

    30 . 3 . 5

    We define the sign bit like so:

    -
    #ifdef NAN_BOXING
    +
     
     

    To get the Obj pointer back out, we simply mask off all of those extra bits.

    -
    #define AS_NUMBER(value)    valueToNum(value)
    +
    #define AS_NUMBER(value)    valueToNum(value)
     
    value.h
    #define AS_OBJ(value) \
         ((Obj*)(uintptr_t)((value) & ~(SIGN_BIT | QNAN)))
    @@ -1026,7 +1026,7 @@ 

    30 . 3 . 5clear those bits and let the pointer bits remain.

    One last macro:

    -
    #define IS_NUMBER(value)    (((value) & QNAN) != QNAN)
    +
    #define IS_NUMBER(value)    (((value) & QNAN) != QNAN)
     
    value.h
    #define IS_OBJ(value) \
         (((value) & (QNAN | SIGN_BIT)) == (QNAN | SIGN_BIT))
    @@ -1049,7 +1049,7 @@ 

    30 . 3

    The first is printValue(). It has separate code for each value type. We no longer have an explicit type enum we can switch on, so instead we use a series of type tests to handle each kind of value.

    -
    void printValue(Value value) {
    +
    void printValue(Value value) {
     
    value.c
    in printValue()
    #ifdef NAN_BOXING
    @@ -1071,7 +1071,7 @@ 

    30 . 3 overhead of actually writing to a stream, it’s negligible.

    We still support the original tagged union representation, so we keep the old code and enclose it in the #else conditional section.

    -
      }
    +
      }
     
    value.c
    in printValue()
    #endif
    @@ -1080,7 +1080,7 @@ 

    30 . 3
    value.c, in printValue()

    The other operation is testing two values for equality.

    -
    bool valuesEqual(Value a, Value b) {
    +
    bool valuesEqual(Value a, Value b) {
     
    value.c
    in valuesEqual()
    #ifdef NAN_BOXING
    @@ -1104,7 +1104,7 @@ 

    30 . 3 it’s possible to produce a “real” arithmetic NaN in Lox, and if we want to correctly implement IEEE 754 numbers, then the resulting value is not supposed to be equal to itself. More concretely:

    -

  • @@ -267,7 +267,7 @@

    stratifying the grammar. We define a separate rule for each precedence level.

    -
    expression     → ...
    +
    expression     → ...
     equality       → ...
     comparison     → ...
     term           → ...
    @@ -298,28 +298,28 @@ 

    expressionequality
    +
    expressionequality
     

    Over at the other end of the precedence table, a primary expression contains all the literals and grouping expressions.

    -
    primaryNUMBER | STRING | "true" | "false" | "nil"
    +
    primaryNUMBER | STRING | "true" | "false" | "nil"
                    | "(" expression ")" ;
     

    A unary expression starts with a unary operator followed by the operand. Since unary operators can nest!!true is a valid if weird expressionthe operand can itself be a unary operator. A recursive rule handles that nicely.

    -
    unary          → ( "!" | "-" ) unary ;
    +
    unary          → ( "!" | "-" ) unary ;
     

    But this rule has a problem. It never terminates.

    Remember, each rule needs to match expressions at that precedence level or higher, so we also need to let this match a primary expression.

    -
    unary          → ( "!" | "-" ) unary
    +
    unary          → ( "!" | "-" ) unary
                    | primary ;
     

    That works.

    The remaining rules are all binary operators. We’ll start with the rule for multiplication and division. Here’s a first try:

    -
    factorfactor ( "/" | "*" ) unary
    +
    factorfactor ( "/" | "*" ) unary
                    | unary ;
     

    The rule recurses to match the left operand. That enables the rule to match a @@ -331,7 +331,7 @@

    —you get the same result either way. Alas, in the real world with limited precision, roundoff and overflow mean that associativity can affect the result of a sequence of multiplications. Consider:

    -
    print 0.1 * (0.2 * 0.3);
    +
    print 0.1 * (0.2 * 0.3);
     print (0.1 * 0.2) * 0.3;
     

    In languages like Lox that use IEEE 754 double-precision floating-point @@ -348,14 +348,14 @@

    factorunary ( ( "/" | "*" ) unary )* ;
    +
    factorunary ( ( "/" | "*" ) unary )* ;
     

    We define a factor expression as a flat sequence of multiplications and divisions. This matches the same syntax as the previous rule, but better mirrors the code we’ll write to parse Lox. We use the same structure for all of the other binary operator precedence levels, giving us this complete expression grammar:

    -
    expressionequality ;
    +
    expressionequality ;
     equalitycomparison ( ( "!=" | "==" ) comparison )* ;
     comparisonterm ( ( ">" | ">=" | "<" | "<=" ) term )* ;
     termfactor ( ( "-" | "+" ) factor )* ;
    @@ -419,9 +419,9 @@ 

    6 call.

    6 . 2 . 1The parser class

    Each grammar rule becomes a method inside this new class:

    -
    lox/Parser.java
    +
    lox/Parser.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.List;
     
    @@ -444,9 +444,9 @@ 

    6 . 2&#

    We’re going to run straight through the expression grammar now and translate each rule to Java code. The first rule, expression, simply expands to the equality rule, so that’s straightforward.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after Parser()
    -
      private Expr expression() {
    +
      private Expr expression() {
         return equality();
       }
     
    @@ -462,12 +462,12 @@

    6 . 2&# and so on, until the parser hits a stack overflow and dies.

    The rule for equality is a little more complex.

    -
    equalitycomparison ( ( "!=" | "==" ) comparison )* ;
    +
    equalitycomparison ( ( "!=" | "==" ) comparison )* ;
     

    In Java, that becomes:

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after expression()
    -
      private Expr equality() {
    +
      private Expr equality() {
         Expr expr = comparison();
     
         while (match(BANG_EQUAL, EQUAL_EQUAL)) {
    @@ -489,9 +489,9 @@ 

    6 . 2&# either a != or == token. So, if we don’t see one of those, we must be done with the sequence of equality operators. We express that check using a handy match() method.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after equality()
    -
      private boolean match(TokenType... types) {
    +
      private boolean match(TokenType... types) {
         for (TokenType type : types) {
           if (check(type)) {
             advance();
    @@ -510,9 +510,9 @@ 

    6 . 2&# fundamental operations.

    The check() method returns true if the current token is of the given type. Unlike match(), it never consumes the token, it only looks at it.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after match()
    -
      private boolean check(TokenType type) {
    +
      private boolean check(TokenType type) {
         if (isAtEnd()) return false;
         return peek().type == type;
       }
    @@ -521,9 +521,9 @@ 

    6 . 2&#

    The advance() method consumes the current token and returns it, similar to how our scanner’s corresponding method crawled through characters.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after check()
    -
      private Token advance() {
    +
      private Token advance() {
         if (!isAtEnd()) current++;
         return previous();
       }
    @@ -531,9 +531,9 @@ 

    6 . 2&#
    lox/Parser.java, add after check()

    These methods bottom out on the last handful of primitive operations.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after advance()
    -
      private boolean isAtEnd() {
    +
      private boolean isAtEnd() {
         return peek().type == EOF;
       }
     
    @@ -573,12 +573,12 @@ 

    6 . 2&# way, this method matches an equality operator or anything of higher precedence.

    Moving on to the next rule . . . 

    -
    comparisonterm ( ( ">" | ">=" | "<" | "<=" ) term )* ;
    +
    comparisonterm ( ( ">" | ">=" | "<" | "<=" ) term )* ;
     

    Translated to Java:

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after equality()
    -
      private Expr comparison() {
    +
      private Expr comparison() {
         Expr expr = term();
     
         while (match(GREATER, GREATER_EQUAL, LESS, LESS_EQUAL)) {
    @@ -603,9 +603,9 @@ 

    6 . 2&# parsing a left-associative series of binary operators given a list of token types, and an operand method handle to simplify this redundant code.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after comparison()
    -
      private Expr term() {
    +
      private Expr term() {
         Expr expr = factor();
     
         while (match(MINUS, PLUS)) {
    @@ -620,9 +620,9 @@ 

    6 . 2&#
    lox/Parser.java, add after comparison()

    And finally, multiplication and division:

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after term()
    -
      private Expr factor() {
    +
      private Expr factor() {
         Expr expr = unary();
     
         while (match(SLASH, STAR)) {
    @@ -639,13 +639,13 @@ 

    6 . 2&#

    That’s all of the binary operators, parsed with the correct precedence and associativity. We’re crawling up the precedence hierarchy and now we’ve reached the unary operators.

    -
    unary          → ( "!" | "-" ) unary
    +
    unary          → ( "!" | "-" ) unary
                    | primary ;
     

    The code for this is a little different.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after factor()
    -
      private Expr unary() {
    +
      private Expr unary() {
         if (match(BANG, MINUS)) {
           Token operator = previous();
           Expr right = unary();
    @@ -667,14 +667,14 @@ 

    6 . 2&#

    Otherwise, we must have reached the highest level of precedence, primary expressions.

    -
    primaryNUMBER | STRING | "true" | "false" | "nil"
    +
    primaryNUMBER | STRING | "true" | "false" | "nil"
                    | "(" expression ")" ;
     

    Most of the cases for the rule are single terminals, so parsing is straightforward.

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after unary()
    -
      private Expr primary() {
    +
      private Expr primary() {
         if (match(FALSE)) return new Expr.Literal(false);
         if (match(TRUE)) return new Expr.Literal(true);
         if (match(NIL)) return new Expr.Literal(null);
    @@ -805,9 +805,9 @@ 

    6 .R code to parse a parenthesized expression. After parsing the expression, the parser looks for the closing ) by calling consume(). Here, finally, is that method:

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after match()
    -
      private Token consume(TokenType type, String message) {
    +
      private Token consume(TokenType type, String message) {
         if (check(type)) return advance();
     
         throw error(peek(), message);
    @@ -818,9 +818,9 @@ 

    6 .R

    It’s similar to match() in that it checks to see if the next token is of the expected type. If so, it consumes the token and everything is groovy. If some other token is there, then we’ve hit an error. We report it by calling this:

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after previous()
    -
      private ParseError error(Token token, String message) {
    +
      private ParseError error(Token token, String message) {
         Lox.error(token, message);
         return new ParseError();
       }
    @@ -828,9 +828,9 @@ 

    6 .R
    lox/Parser.java, add after previous()

    First, that shows the error to the user by calling:

    -
    lox/Lox.java
    +
    lox/Lox.java
    add after report()
    -
      static void error(Token token, String message) {
    +
      static void error(Token token, String message) {
         if (token.type == TokenType.EOF) {
           report(token.line, " at end", message);
         } else {
    @@ -846,7 +846,7 @@ 

    6 .R

    After we report the error, the user knows about their mistake, but what does the parser do next? Back in error(), we create and return a ParseError, an instance of this new class:

    -
    class Parser {
    +
    class Parser {
     
    lox/Parser.java
    nest inside class Parser
      private static class ParseError extends RuntimeException {}
    @@ -873,7 +873,7 @@ 

    6 .R

    For example, some languages have a unary + operator, like +123, but Lox does not. Instead of getting confused when the parser stumbles onto a + at the beginning of an expression, we could extend the unary rule to allow it.

    -
    unary → ( "!" | "-" | "+" ) unary
    +
    unary → ( "!" | "-" | "+" ) unary
           | primary ;
     

    This lets the parser consume + without going into panic mode or leaving the @@ -910,9 +910,9 @@

    lox/Parser.java
    +
    lox/Parser.java
    add after error()
    -
      private void synchronize() {
    +
      private void synchronize() {
         advance();
     
         while (!isAtEnd()) {
    @@ -951,7 +951,7 @@ 

    6 .&# methods for each grammar rule, it eventually hits primary(). If none of the cases in there match, it means we are sitting on a token that can’t start an expression. We need to handle that error too.

    -
        if (match(LEFT_PAREN)) {
    +
        if (match(LEFT_PAREN)) {
           Expr expr = expression();
           consume(RIGHT_PAREN, "Expect ')' after expression.");
           return new Expr.Grouping(expr);
    @@ -967,9 +967,9 @@ 

    6 .&#

    With that, all that remains in the parser is to define an initial method to kick it off. That method is called, naturally enough, parse().

    -
    lox/Parser.java
    +
    lox/Parser.java
    add after Parser()
    -
      Expr parse() {
    +
      Expr parse() {
         try {
           return expression();
         } catch (ParseError error) {
    @@ -992,7 +992,7 @@ 

    6 .&# tree and then use the AstPrinter class from the last chapter to display it.

    Delete the old code to print the scanned tokens and replace it with this:

    -
        List<Token> tokens = scanner.scanTokens();
    +
        List<Token> tokens = scanner.scanTokens();
     
    lox/Lox.java
    in run()
    replace 5 lines
    @@ -1056,7 +1056,7 @@

    Design Note: Logic Versus History—and most languages that follow in C’s footstepsplace them below ==. This is widely considered a mistake because it means common operations like testing a flag require parentheses.

    -
    if (flags & FLAG_MASK == SOME_FLAG) { ... } // Wrong.
    +
    if (flags & FLAG_MASK == SOME_FLAG) { ... } // Wrong.
     if ((flags & FLAG_MASK) == SOME_FLAG) { ... } // Right.
     

    Should we fix this for Lox and put bitwise operators higher up the precedence diff --git a/site/representing-code.html b/site/representing-code.html index 90905bade..0891f4434 100644 --- a/site/representing-code.html +++ b/site/representing-code.html @@ -1,5 +1,5 @@ - + Representing Code · Crafting Interpreters @@ -114,7 +114,7 @@

    Representing Code

    those requirements aren’t exactly illuminating. Maybe your intuition can help. What is your brain doing when you play the part of a human interpreter? How do you mentally evaluate an arithmetic expression like this:

    -
    1 + 2 * 3 - 4
    +
    1 + 2 * 3 - 4
     

    Because you understand the order of operationsthe old “Please Excuse My Dear Aunt Sally” stuffyou know that the multiplication is evaluated @@ -255,7 +255,7 @@

    5 . 

    Yes, I really am going to be using breakfast examples throughout this entire book. Sorry.

    -
    breakfastprotein "with" breakfast "on the side" ;
    +
    breakfastprotein "with" breakfast "on the side" ;
     breakfastprotein ;
     breakfastbread ;
     
    @@ -278,15 +278,15 @@ 

    5 .  see how it works. By age-old convention, the game starts with the first rule in the grammar, here breakfast. There are three productions for that, and we randomly pick the first one. Our resulting string looks like:

    -
    protein "with" breakfast "on the side"
    +
    protein "with" breakfast "on the side"
     

    We need to expand that first nonterminal, protein, so we pick a production for that. Let’s pick:

    -
    proteincooked "eggs" ;
    +
    proteincooked "eggs" ;
     

    Next, we need a production for cooked, and so we pick "poached". That’s a terminal, so we add that. Now our string looks like:

    -
    "poached" "eggs" "with" breakfast "on the side"
    +
    "poached" "eggs" "with" breakfast "on the side"
     

    The next non-terminal is breakfast again. The first breakfast production we chose recursively refers back to the breakfast rule. Recursion in the grammar @@ -327,13 +327,13 @@

    5 

    Instead of repeating the rule name each time we want to add another production for it, we’ll allow a series of productions separated by a pipe (|).

    -
    bread"toast" | "biscuits" | "English muffin" ;
    +
    bread"toast" | "biscuits" | "English muffin" ;
     
  • Further, we’ll allow parentheses for grouping and then allow | within that to select one from a series of options within the middle of a production.

    -
    protein → ( "scrambled" | "poached" | "fried" ) "eggs" ;
    +
    protein → ( "scrambled" | "poached" | "fried" ) "eggs" ;
     
  • @@ -342,7 +342,7 @@

    5  make a separate named sub-rule each time we want to loop. So, we also use a postfix * to allow the previous symbol or group to be repeated zero or more times.

    -
    crispiness"really" "really"* ;
    +
    crispiness"really" "really"* ;
     

  • @@ -355,18 +355,18 @@

    5 
  • A postfix + is similar, but requires the preceding production to appear at least once.

    -
    crispiness"really"+ ;
    +
    crispiness"really"+ ;
     
  • A postfix ? is for an optional production. The thing before it can appear zero or one time, but not more.

    -
    breakfastprotein ( "with" breakfast "on the side" )? ;
    +
    breakfastprotein ( "with" breakfast "on the side" )? ;
     
  • With all of those syntactic niceties, our breakfast grammar condenses down to:

    -
    breakfastprotein ( "with" breakfast "on the side" )?
    +
    breakfastprotein ( "with" breakfast "on the side" )?
               | bread ;
     
     protein"really"+ "crispy" "bacon"
    @@ -414,10 +414,10 @@ 

    That gives us enough syntax for expressions like:

    -
    1 - (2 * 3) < 4 == false
    +
    1 - (2 * 3) < 4 == false
     

    Using our handy dandy new notation, here’s a grammar for those:

    -
    expressionliteral
    +
    expressionliteral
                    | unary
                    | binary
                    | grouping ;
    @@ -471,7 +471,7 @@ 

    5 keep things simpler.

    Something like this:

    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     abstract class Expr { 
       static class Binary extends Expr {
    @@ -544,9 +544,9 @@ 

    5

    An actual scripting language would be a better fit for this than Java, but I’m trying not to throw too many languages at you.

    -
    tool/GenerateAst.java
    +
    tool/GenerateAst.java
    create new file
    -
    package com.craftinginterpreters.tool;
    +
    package com.craftinginterpreters.tool;
     
     import java.io.IOException;
     import java.io.PrintWriter;
    @@ -572,7 +572,7 @@ 

    5 We are merely automating how that file gets authored.

    To generate the classes, it needs to have some description of each type and its fields.

    -
        String outputDir = args[0];
    +
        String outputDir = args[0];
     
    tool/GenerateAst.java
    in main()
        defineAst(outputDir, "Expr", Arrays.asList(
    @@ -589,9 +589,9 @@ 

    5 strings. Each is the name of the class followed by : and the list of fields, separated by commas. Each field has a type and a name.

    The first thing defineAst() needs to do is output the base Expr class.

    -
    tool/GenerateAst.java
    +
    tool/GenerateAst.java
    add after main()
    -
      private static void defineAst(
    +
      private static void defineAst(
           String outputDir, String baseName, List<String> types)
           throws IOException {
         String path = outputDir + "/" + baseName + ".java";
    @@ -614,7 +614,7 @@ 

    5 hardcoding the name because we’ll add a separate family of classes later for statements.

    Inside the base class, we define each subclass.

    -
        writer.println("abstract class " + baseName + " {");
    +
        writer.println("abstract class " + baseName + " {");
     
     
    tool/GenerateAst.java
    in defineAst()
    @@ -634,9 +634,9 @@

    5 a priority.

    That code, in turn, calls:

    -
    tool/GenerateAst.java
    +
    tool/GenerateAst.java
    add after defineAst()
    -
      private static void defineType(
    +
      private static void defineType(
           PrintWriter writer, String baseName,
           String className, String fieldList) {
         writer.println("  static class " + className + " extends " +
    @@ -683,7 +683,7 @@ 

    5 .  tokens, we can simply switch on the TokenType. But we don’t have a “type” enum for the syntax trees, just a separate Java class for each one.

    We could write a long chain of type tests:

    -
    if (expr instanceof Expr.Binary) {
    +
    if (expr instanceof Expr.Binary) {
       // ...
     } else if (expr instanceof Expr.Grouping) {
       // ...
    @@ -792,7 +792,7 @@ 

    5 .R watch tourists staggering around trying to shake off their hangover from the previous night’s revelry.

    -
      abstract class Pastry {
    +
      abstract class Pastry {
       }
     
       class Beignet extends Pastry {
    @@ -805,7 +805,7 @@ 

    5 .R

    We want to be able to define new pastry operationscooking them, eating them, decorating them, etc.without having to add a new method to each class every time. Here’s how we do it. First, we define a separate interface.

    -
      interface PastryVisitor {
    +
      interface PastryVisitor {
         void visitBeignet(Beignet beignet); 
         void visitCruller(Cruller cruller);
       }
    @@ -825,13 +825,13 @@ 

    5 .R code for the operation on both types all nestled snugly together in one class.

    Given some pastry, how do we route it to the correct method on the visitor based on its type? Polymorphism to the rescue! We add this method to Pastry:

    -
      abstract class Pastry {
    +
      abstract class Pastry {
     
        abstract void accept(PastryVisitor visitor);
     
      }
     

    Each subclass implements it.

    -
      class Beignet extends Pastry {
    +
     
     

    And:

    -
      class Cruller extends Pastry {
    +
      class Cruller extends Pastry {
     
        @Override
         void accept(PastryVisitor visitor) {
           visitor.visitCruller(this);
    @@ -874,7 +874,7 @@ 

    5&#

    First, we define the visitor interface. Again, we nest it inside the base class so that we can keep everything in one file.

    -
        writer.println("abstract class " + baseName + " {");
    +
        writer.println("abstract class " + baseName + " {");
     
     
    tool/GenerateAst.java
    in defineAst()
    @@ -885,9 +885,9 @@

    5&#
    tool/GenerateAst.java, in defineAst()

    That function generates the visitor interface.

    -
    tool/GenerateAst.java
    +
    tool/GenerateAst.java
    add after defineAst()
    -
      private static void defineVisitor(
    +
      private static void defineVisitor(
           PrintWriter writer, String baseName, List<String> types) {
         writer.println("  interface Visitor<R> {");
     
    @@ -906,7 +906,7 @@ 

    5&# each one. When we define new expression types later, this will automatically include them.

    Inside the base class, we define the abstract accept() method.

    -
          defineType(writer, baseName, className, fields);
    +
          defineType(writer, baseName, className, fields);
         }
     
    tool/GenerateAst.java
    in defineAst()
    @@ -922,7 +922,7 @@

    5&#

    Finally, each subclass implements that and calls the right visit method for its own type.

    -
        writer.println("    }");
    +
        writer.println("    }");
     
    tool/GenerateAst.java
    in defineType()
    @@ -964,13 +964,13 @@ 

    5 parenthesized, and all of its subexpressions and tokens are contained in that.

    Given a syntax tree like:

    An example syntax tree.

    It produces:

    -
    (* (- 123) (group 45.67))
    +
    (* (- 123) (group 45.67))
     

    Not exactly “pretty”, but it does show the nesting and grouping explicitly. To implement this, we define a new class.

    -
    lox/AstPrinter.java
    +
    lox/AstPrinter.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     class AstPrinter implements Expr.Visitor<String> {
       String print(Expr expr) {
    @@ -982,7 +982,7 @@ 

    5

    As you can see, it implements the visitor interface. That means we need visit methods for each of the expression types we have so far.

    -
        return expr.accept(this);
    +
        return expr.accept(this);
       }
     
    lox/AstPrinter.java
    add after print()
    @@ -1016,9 +1016,9 @@

    5

    Literal expressions are easythey convert the value to a string with a little check to handle Java’s null standing in for Lox’s nil. The other expressions have subexpressions, so they use this parenthesize() helper method:

    -
    lox/AstPrinter.java
    +
    lox/AstPrinter.java
    add after visitUnaryExpr()
    -
      private String parenthesize(String name, Expr... exprs) {
    +
      private String parenthesize(String name, Expr... exprs) {
         StringBuilder builder = new StringBuilder();
     
         builder.append("(").append(name);
    @@ -1035,7 +1035,7 @@ 

    5

    It takes a name and a list of subexpressions and wraps them all up in parentheses, yielding a string like:

    -
    (+ 1 2)
    +
    (+ 1 2)
     

    Note that it calls accept() on each subexpression and passes in itself. This is the recursive step that lets us print an entire @@ -1047,9 +1047,9 @@

    5

    We don’t have a parser yet, so it’s hard to see this in action. For now, we’ll hack together a little main() method that manually instantiates a tree and prints it.

    -
    lox/AstPrinter.java
    +
    lox/AstPrinter.java
    add after parenthesize()
    -
      public static void main(String[] args) {
    +
      public static void main(String[] args) {
         Expr expression = new Expr.Binary(
             new Expr.Unary(
                 new Token(TokenType.MINUS, "-", null, 1),
    @@ -1064,7 +1064,7 @@ 

    5
    lox/AstPrinter.java, add after parenthesize()

    If we did everything right, it prints:

    -
    (* (- 123) (group 45.67))
    +
    (* (- 123) (group 45.67))
     

    You can go ahead and delete this method. We won’t need it. Also, as we add new syntax tree types, I won’t bother showing the necessary visit methods for them @@ -1078,7 +1078,7 @@

    Challenges

  • Earlier, I said that the |, *, and + forms we added to our grammar metasyntax were just syntactic sugar. Take this grammar:

    -
    exprexpr ( "(" ( expr ( "," expr )* )? ")" | "." IDENTIFIER )+
    +
    exprexpr ( "(" ( expr ( "," expr )* )? ")" | "." IDENTIFIER )+
          | IDENTIFIER
          | NUMBER
     
    @@ -1100,10 +1100,10 @@

    Challenges

    Evaluation proceeds from left to right. Numbers are pushed onto an implicit stack. An arithmetic operator pops the top two numbers, performs the operation, and pushes the result. Thus, this:

    -
    (1 + 2) * (4 - 3)
    +
    (1 + 2) * (4 - 3)
     

    in RPN becomes:

    -
    1 2 + 4 3 - *
    +
    1 2 + 4 3 - *
     

    Define a visitor class for our syntax tree classes that takes an expression, converts it to RPN, and returns the resulting string.

    diff --git a/site/resolving-and-binding.html b/site/resolving-and-binding.html index 5aea30cf1..dcad7636f 100644 --- a/site/resolving-and-binding.html +++ b/site/resolving-and-binding.html @@ -1,5 +1,5 @@ - + Resolving and Binding · Crafting Interpreters @@ -114,7 +114,7 @@

    11 . 1S

    A quick refresher: Lox, like most modern languages, uses lexical scoping. This means that you can figure out which declaration a variable name refers to just by reading the text of the program. For example:

    -

  • “Preceding” means appearing before in the program text.

    -
    var a = "outer";
    +
    var a = "outer";
     {
       print a;
       var a = "inner";
    @@ -163,13 +163,13 @@ 

    11 . 1S the beginning of the block. Any use of that name in the block will refer to that variable, even if the use appears before the declaration. When you write this in JavaScript:

    -
    {
    +
    {
       console.log(a);
       var a = "value";
     }
     

    It behaves like:

    -
    {
    +
    {
       var a; // Hoist.
       console.log(a);
       a = "value";
    @@ -182,7 +182,7 @@ 

    11 . 1S
  • “Innermost” is there because of our good friend shadowing. There may be more than one variable with the given name in enclosing scopes, as in:

    -
    var a = "outer";
    +
    var a = "outer";
     {
       var a = "inner";
       print a;
    @@ -195,7 +195,7 @@ 

    11 . 1S variable expression always refers to the same declaration through the entire execution of the program. Our interpreter so far mostly implements the rule correctly. But when we added closures, an error snuck in.

    -
    var a = "global";
    +
    var a = "global";
     {
       fun showA() {
         print a;
    @@ -220,7 +220,7 @@ 

    11 . 1S by our rule that a variable expression always resolves to the same variable, that implies the second call to showA() should print the same thing.

    Alas, it prints:

    -
    global
    +
    global
     block
     

    Let me stress that this program never reassigns any variable and contains only a @@ -267,7 +267,7 @@

    {
    +
    {
       var a;
       // 1.
       var b;
    @@ -394,9 +394,9 @@ 

    11 . 3A Resolver Class

    Like everything in Java, our variable resolution pass is embodied in a class.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.HashMap;
     import java.util.List;
    @@ -438,9 +438,9 @@ 

    11 . 3<

    11 . 3 . 1Resolving blocks

    We start with blocks since they create the local scopes where all the magic happens.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after Resolver()
    -
      @Override
    +
      @Override
       public Void visitBlockStmt(Stmt.Block stmt) {
         beginScope();
         resolve(stmt.statements);
    @@ -453,9 +453,9 @@ 

    11 . 3&

    This begins a new scope, traverses into the statements inside the block, and then discards the scope. The fun stuff lives in those helper methods. We start with the simple one.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after Resolver()
    -
      void resolve(List<Stmt> statements) {
    +
      void resolve(List<Stmt> statements) {
         for (Stmt statement : statements) {
           resolve(statement);
         }
    @@ -464,9 +464,9 @@ 

    11 . 3&
    lox/Resolver.java, add after Resolver()

    This walks a list of statements and resolves each one. It in turn calls:

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitBlockStmt()
    -
      private void resolve(Stmt stmt) {
    +
      private void resolve(Stmt stmt) {
         stmt.accept(this);
       }
     
    @@ -474,9 +474,9 @@

    11 . 3&

    While we’re at it, let’s add another overload that we’ll need later for resolving an expression.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after resolve(Stmt stmt)
    -
      private void resolve(Expr expr) {
    +
      private void resolve(Expr expr) {
         expr.accept(this);
       }
     
    @@ -487,9 +487,9 @@

    11 . 3& syntax tree node.

    The real interesting behavior is around scopes. A new block scope is created like so:

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after resolve()
    -
      private void beginScope() {
    +
      private void beginScope() {
         scopes.push(new HashMap<String, Boolean>());
       }
     
    @@ -498,7 +498,7 @@

    11 . 3&

    Lexical scopes nest in both the interpreter and the resolver. They behave like a stack. The interpreter implements that stack using a linked listthe chain of Environment objects. In the resolver, we use an actual Java Stack.

    -

  • @@ -590,9 +590,9 @@

    lox/Resolver.java
    +
    lox/Resolver.java
    add after endScope()
    -
      private void declare(Token name) {
    +
      private void declare(Token name) {
         if (scopes.isEmpty()) return;
     
         Map<String, Boolean> scope = scopes.peek();
    @@ -610,9 +610,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after declare()
    -
      private void define(Token name) {
    +
      private void define(Token name) {
         if (scopes.isEmpty()) return;
         scopes.peek().put(name.lexeme, true);
       }
    @@ -624,9 +624,9 @@ 

    11 . 3 . 3Resolving variable expressions

    Variable declarationsand function declarations, which we’ll get towrite to the scope maps. Those maps are read when we resolve variable expressions.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitVarStmt()
    -
      @Override
    +
      @Override
       public Void visitVariableExpr(Expr.Variable expr) {
         if (!scopes.isEmpty() &&
             scopes.peek().get(expr.name.lexeme) == Boolean.FALSE) {
    @@ -645,9 +645,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after define()
    -
      private void resolveLocal(Expr expr, Token name) {
    +
      private void resolveLocal(Expr expr, Token name) {
         for (int i = scopes.size() - 1; i >= 0; i--) {
           if (scopes.get(i).containsKey(name.lexeme)) {
             interpreter.resolve(expr, scopes.size() - 1 - i);
    @@ -671,9 +671,9 @@ 

    11 . 3 . 4Resolving assignment expressions

    The other expression that references a variable is assignment. Resolving one looks like this:

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitVarStmt()
    -
      @Override
    +
      @Override
       public Void visitAssignExpr(Expr.Assign expr) {
         resolve(expr.value);
         resolveLocal(expr, expr.name);
    @@ -690,9 +690,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitBlockStmt()
    -
      @Override
    +
      @Override
       public Void visitFunctionStmt(Stmt.Function stmt) {
         declare(stmt.name);
         define(stmt.name);
    @@ -708,9 +708,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after resolve()
    -
      private void resolveFunction(Stmt.Function function) {
    +
      private void resolveFunction(Stmt.Function function) {
         beginScope();
         for (Token param : function.params) {
           declare(param);
    @@ -742,9 +742,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitBlockStmt()
    -
      @Override
    +
      @Override
       public Void visitExpressionStmt(Stmt.Expression stmt) {
         resolve(stmt.expression);
         return null;
    @@ -754,9 +754,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitFunctionStmt()
    -
      @Override
    +
      @Override
       public Void visitIfStmt(Stmt.If stmt) {
         resolve(stmt.condition);
         resolve(stmt.thenBranch);
    @@ -772,9 +772,9 @@ 

    —it analyzes any branch that could be run. Since either one could be reached at runtime, we resolve both.

    Like expression statements, a print statement contains a single subexpression.

    -
    lox/Resolver.java
    +

    Same deal for return.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitPrintStmt()
    -
      @Override
    +
      @Override
       public Void visitReturnStmt(Stmt.Return stmt) {
         if (stmt.value != null) {
           resolve(stmt.value);
    @@ -798,9 +798,9 @@ 

    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitVarStmt()
    -
      @Override
    +
      @Override
       public Void visitWhileStmt(Stmt.While stmt) {
         resolve(stmt.condition);
         resolve(stmt.body);
    @@ -812,9 +812,9 @@ 

     . . . 

    Our old friend the binary expression. We traverse into and resolve both operands.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitAssignExpr()
    -
      @Override
    +
      @Override
       public Void visitBinaryExpr(Expr.Binary expr) {
         resolve(expr.left);
         resolve(expr.right);
    @@ -826,9 +826,9 @@ 

    —we walk the argument list and resolve them all. The thing being called is also an expression (usually a variable expression), so that gets resolved too.

    -
    lox/Resolver.java
    +

    Parentheses are easy.

    -
    lox/Resolver.java
    +

    Literals are easiest of all.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitGroupingExpr()
    -
      @Override
    +
      @Override
       public Void visitLiteralExpr(Expr.Literal expr) {
         return null;
       }
    @@ -866,9 +866,9 @@ 

    lox/Resolver.java
    +

    And, finally, the last node. We resolve its one operand.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after visitLogicalExpr()
    -
      @Override
    +
      @Override
       public Void visitUnaryExpr(Expr.Unary expr) {
         resolve(expr.right);
         return null;
    @@ -898,9 +898,9 @@ 

    lox/Interpreter.java
    +
    lox/Interpreter.java
    add after execute()
    -
      void resolve(Expr expr, int depth) {
    +
      void resolve(Expr expr, int depth) {
         locals.put(expr, depth);
       }
     
    @@ -924,7 +924,7 @@

    —simply clear the map.

    -
      private Environment environment = globals;
    +
      private Environment environment = globals;
     
    lox/Interpreter.java
    in class Interpreter
      private final Map<Expr, Integer> locals = new HashMap<>();
    @@ -939,7 +939,7 @@ 

    import java.util.ArrayList;
    +
     
     

    And:

    -
    import java.util.List;
    +
    import java.util.List;
     
    lox/Interpreter.java
    import java.util.Map;
     
    @@ -960,7 +960,7 @@ 

    Our interpreter now has access to each variable’s resolved location. Finally, we get to make use of that. We replace the visit method for variable expressions with this:

    -
      public Object visitVariableExpr(Expr.Variable expr) {
    +
      public Object visitVariableExpr(Expr.Variable expr) {
     
    lox/Interpreter.java
    in visitVariableExpr()
    replace 1 line
    @@ -970,9 +970,9 @@

    lox/Interpreter.java, in visitVariableExpr(), replace 1 line

    That delegates to:

    -
    lox/Interpreter.java
    +
    lox/Interpreter.java
    add after visitVariableExpr()
    -
      private Object lookUpVariable(Token name, Expr expr) {
    +
      private Object lookUpVariable(Token name, Expr expr) {
         Integer distance = locals.get(expr);
         if (distance != null) {
           return environment.getAt(distance, name.lexeme);
    @@ -992,9 +992,9 @@ 

    If we do get a distance, we have a local variable, and we get to take advantage of the results of our static analysis. Instead of calling get(), we call this new method on Environment:

    -
    lox/Environment.java
    +
    lox/Environment.java
    add after define()
    -
      Object getAt(int distance, String name) {
    +
      Object getAt(int distance, String name) {
         return ancestor(distance).values.get(name);
       }
     
    @@ -1004,9 +1004,9 @@

    scouring each one to see if the variable might be hiding in there somewhere. But now we know exactly which environment in the chain will have the variable. We reach it using this helper method:

    -
    lox/Environment.java
    +
    lox/Environment.java
    add after define()
    -
      Environment ancestor(int distance) {
    +
      Environment ancestor(int distance) {
         Environment environment = this;
         for (int i = 0; i < distance; i++) {
           environment = environment.enclosing; 
    @@ -1038,7 +1038,7 @@ 

    11 . 4 . 2Assigning to a resolved variable

    We can also use a variable by assigning to it. The changes to visiting an assignment expression are similar.

    -
      public Object visitAssignExpr(Expr.Assign expr) {
    +
      public Object visitAssignExpr(Expr.Assign expr) {
         Object value = evaluate(expr.value);
     
    lox/Interpreter.java
    in visitAssignExpr()
    @@ -1058,9 +1058,9 @@

    lox/Environment.java
    +
    lox/Environment.java
    add after getAt()
    -
      void assignAt(int distance, Token name, Object value) {
    +
      void assignAt(int distance, Token name, Object value) {
         ancestor(distance).values.put(name.lexeme, value);
       }
     
    @@ -1075,7 +1075,7 @@

    11 . 4 . 3Running the resolver

    We do need to actually run the resolver, though. We insert the new pass after the parser does its magic.

    -
        // Stop if there was a syntax error.
    +
        // Stop if there was a syntax error.
         if (hadError) return;
     
     
    lox/Lox.java
    @@ -1099,7 +1099,7 @@

    11 . 

    Since we are doing a semantic analysis pass, we have an opportunity to make Lox’s semantics more precise, and to help users catch bugs early before running their code. Take a look at this bad boy:

    -
    fun bad() {
    +
    fun bad() {
       var a = "first";
       var a = "second";
     }
    @@ -1110,7 +1110,7 @@ 

    11 .  And if they didn’t know it existed, they probably didn’t intend to overwrite the previous one.

    We can detect this mistake statically while resolving.

    -
        Map<String, Boolean> scope = scopes.peek();
    +
        Map<String, Boolean> scope = scopes.peek();
     
    lox/Resolver.java
    in declare()
        if (scope.containsKey(name.lexeme)) {
    @@ -1127,7 +1127,7 @@ 

    11 .  report an error.

    11 . 5 . 1Invalid return errors

    Here’s another nasty little script:

    -
    return "at top level";
    +
    return "at top level";
     

    This executes a return statement, but it’s not even inside a function at all. It’s top-level code. I don’t know what the user thinks is going to happen, but @@ -1135,7 +1135,7 @@

    11 

    We can extend the resolver to detect this statically. Much like we track scopes as we walk the tree, we can track whether or not the code we are currently visiting is inside a function declaration.

    -
      private final Stack<Map<String, Boolean>> scopes = new Stack<>();
    +
      private final Stack<Map<String, Boolean>> scopes = new Stack<>();
     
    lox/Resolver.java
    in class Resolver
      private FunctionType currentFunction = FunctionType.NONE;
    @@ -1146,9 +1146,9 @@ 

    11 
    lox/Resolver.java, in class Resolver

    Instead of a bare Boolean, we use this funny enum:

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    add after Resolver()
    -
      private enum FunctionType {
    +
      private enum FunctionType {
         NONE,
         FUNCTION
       }
    @@ -1158,7 +1158,7 @@ 

    11 

    It seems kind of dumb now, but we’ll add a couple more cases to it later and then it will make more sense. When we resolve a function declaration, we pass that in.

    -
        define(stmt.name);
    +
        define(stmt.name);
     
     
    lox/Resolver.java
    in visitFunctionStmt()
    @@ -1170,7 +1170,7 @@

    11 

    Over in resolveFunction(), we take that parameter and store it in the field before resolving the body.

    -
    lox/Resolver.java
    +
    lox/Resolver.java
    method resolveFunction()
    replace 1 line
      private void resolveFunction(
    @@ -1190,7 +1190,7 @@ 

    11  we’ll piggyback on the JVM. We store the previous value in a local on the Java stack. When we’re done resolving the function body, we restore the field to that value.

    -
        endScope();
    +
        endScope();
     
    lox/Resolver.java
    in resolveFunction()
        currentFunction = enclosingFunction;
    @@ -1200,7 +1200,7 @@ 

    11 

    Now that we can always tell whether or not we’re inside a function declaration, we check that when resolving a return statement.

    -
      public Void visitReturnStmt(Stmt.Return stmt) {
    +
      public Void visitReturnStmt(Stmt.Return stmt) {
     
    lox/Resolver.java
    in visitReturnStmt()
        if (currentFunction == FunctionType.NONE) {
    @@ -1218,7 +1218,7 @@ 

    11  resolve syntactically invalid code.

    But we also need to skip the interpreter if there are resolution errors, so we add another check.

    -
        resolver.resolve(statements);
    +
        resolver.resolve(statements);
     
    lox/Lox.java
    in run()
    @@ -1260,7 +1260,7 @@ 

    Challenges

  • How do other languages you know handle local variables that refer to the same name in their initializer, like:

    -
    var a = "outer";
    +
    var a = "outer";
     {
       var a = a;
     }
    diff --git a/site/scanning-on-demand.html b/site/scanning-on-demand.html
    index ffcc8ca3c..5ab21701b 100644
    --- a/site/scanning-on-demand.html
    +++ b/site/scanning-on-demand.html
    @@ -1,5 +1,5 @@
     
    -
    +
     
     
     Scanning on Demand · Crafting Interpreters
    @@ -110,7 +110,7 @@ 

    Now that we’re building the front end, we can get clox running like a real interpreter. No more hand-authored chunks of bytecode. It’s time for a REPL and script loading. Tear out most of the code in main() and replace it with:

    -
    int main(int argc, const char* argv[]) {
    +
    int main(int argc, const char* argv[]) {
       initVM();
     
     
    main.c
    @@ -139,7 +139,7 @@

    argv is always the name of the executable being run.

    We’ll need a few system headers, so let’s get them all out of the way.

    -
    main.c
    +

    Next, we get the REPL up and REPL-ing.

    -
    #include "vm.h"
    +
    #include "vm.h"
     
    main.c
     
    @@ -175,9 +175,9 @@ 

    The real work happens in interpret(). We’ll get to that soon, but first let’s take care of loading scripts.

    -
    main.c
    +
    main.c
    add after repl()
    -
    static void runFile(const char* path) {
    +
    static void runFile(const char* path) {
       char* source = readFile(path);
       InterpretResult result = interpret(source);
       free(source); 
    @@ -201,9 +201,9 @@ 

    I like C’s simplicity, but we pay a real price for itthe language requires us to be more conscientious.

    -
    main.c
    +
    main.c
    add after repl()
    -
    static char* readFile(const char* path) {
    +
    static char* readFile(const char* path) {
       FILE* file = fopen(path, "rb");
     
       fseek(file, 0L, SEEK_END);
    @@ -245,7 +245,7 @@ 

    -
      FILE* file = fopen(path, "rb");
    +
      FILE* file = fopen(path, "rb");
     
    main.c
    in readFile()
      if (file == NULL) {
    @@ -261,7 +261,7 @@ 

    This can happen if the file doesn’t exist or the user doesn’t have access to it. It’s pretty commonpeople mistype paths all the time.

    This failure is much rarer:

    -
      char* buffer = (char*)malloc(fileSize + 1);
    +
      char* buffer = (char*)malloc(fileSize + 1);
     
    main.c
    in readFile()
      if (buffer == NULL) {
    @@ -277,7 +277,7 @@ 

    Finally, the read itself may fail.

    -
      size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
    +
      size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
     
    main.c
    in readFile()
      if (bytesRead < fileSize) {
    @@ -301,7 +301,7 @@ 

    void freeVM();
    +
    void freeVM();
     
    vm.h
    function interpret()
    replace 1 line
    @@ -312,7 +312,7 @@

    vm.c
    +
    vm.c
    function interpret()
    replace 4 lines
    InterpretResult interpret(const char* source) {
    @@ -324,7 +324,7 @@ 

    #include "common.h"
    +
     
     

    For now, the one function in it is declared like so:

    -
    compiler.h
    +
    compiler.h
    create new file
    -
    #ifndef clox_compiler_h
    +
    #ifndef clox_compiler_h
     #define clox_compiler_h
     
     void compile(const char* source);
    @@ -346,9 +346,9 @@ 

    —the thing we’re doing in this chapterso right now all the compiler does is set that up.

    -
    compiler.c
    +
    compiler.c
    create new file
    -
    #include <stdio.h>
    +
    #include <stdio.h>
     
     #include "common.h"
     #include "compiler.h"
    @@ -364,9 +364,9 @@ 

    16 . 1 . 2The scanner scans

    There are still a few more feet of scaffolding to stand up before we can start writing useful code. First, a new header:

    -
    scanner.h
    +
    scanner.h
    create new file
    -
    #ifndef clox_scanner_h
    +
    #ifndef clox_scanner_h
     #define clox_scanner_h
     
     void initScanner(const char* source);
    @@ -376,9 +376,9 @@ 

    16 . 
    scanner.h, create new file

    And its corresponding implementation:

    -
    scanner.c
    +
    scanner.c
    create new file
    -
    #include <stdio.h>
    +
    #include <stdio.h>
     #include <string.h>
     
     #include "common.h"
    @@ -411,9 +411,9 @@ 

    16 .  source code string. The scanner works its way through the code once and is done after that.

    Since we have some state, we should initialize it.

    -
    scanner.c
    +
    scanner.c
    add after variable scanner
    -
    void initScanner(const char* source) {
    +
    void initScanner(const char* source) {
       scanner.start = source;
       scanner.current = source;
       scanner.line = 1;
    @@ -437,7 +437,7 @@ 

    16 . 

    Unfortunately, we don’t have a compiler yet that can ask the scanner for tokens, so the scanner will just sit there doing nothing. To kick it into action, we’ll write some temporary code to drive it.

    -
      initScanner(source);
    +
      initScanner(source);
     
    compiler.c
    in compile()
      int line = -1;
    @@ -468,10 +468,10 @@ 

    16 . 

    This loops indefinitely. Each turn through the loop, it scans one token and prints it. When it reaches a special “end of file” token or an error, it stops. For example, if we run the interpreter on this program:

    -
    print 1 + 2;
    +
    print 1 + 2;
     

    It prints out:

    -
       1 31 'print'
    +
       1 31 'print'
        | 21 '1'
        |  7 '+'
        | 21 '2'
    @@ -487,7 +487,7 @@ 

    16 . 

    The goal for the rest of the chapter is to make that blob of code work by implementing this key function:

    -
    void initScanner(const char* source);
    +
    void initScanner(const char* source);
     
    scanner.h
    add after initScanner()
    Token scanToken();
    @@ -499,7 +499,7 @@ 

    16 . 

    Each call scans and returns the next token in the source code. A token looks like this:

    -
    #define clox_scanner_h
    +
    #define clox_scanner_h
     
    scanner.h
     
    @@ -518,7 +518,7 @@ 

    16 . 

    It’s pretty similar to jlox’s Token class. We have an enum identifying what type of token it isnumber, identifier, + operator, etc. The enum is virtually identical to the one in jlox, so let’s just hammer out the whole thing.

    -
    #ifndef clox_scanner_h
    +
    #ifndef clox_scanner_h
     #define clox_scanner_h
     
    scanner.h
    @@ -576,9 +576,9 @@ 

    16 . 

    16 . 2 . 1Scanning tokens

    We’re ready to scan some tokens. We’ll work our way up to the complete implementation, starting with this:

    -
    scanner.c
    +
    scanner.c
    add after initScanner()
    -
    Token scanToken() {
    +
    Token scanToken() {
       scanner.start = scanner.current;
     
       if (isAtEnd()) return makeToken(TOKEN_EOF);
    @@ -602,9 +602,9 @@ 

    16 . 2 return an error token for that.

    This function relies on a couple of helpers, most of which are familiar from jlox. First up:

    -
    scanner.c
    +
    scanner.c
    add after initScanner()
    -
    static bool isAtEnd() {
    +
    static bool isAtEnd() {
       return *scanner.current == '\0';
     }
     
    @@ -613,9 +613,9 @@

    16 . 2

    We require the source string to be a good null-terminated C string. If the current character is the null byte, then we’ve reached the end.

    To create a token, we have this constructor-like function:

    -
    scanner.c
    +
    scanner.c
    add after isAtEnd()
    -
    static Token makeToken(TokenType type) {
    +
    static Token makeToken(TokenType type) {
       Token token;
       token.type = type;
       token.start = scanner.start;
    @@ -629,9 +629,9 @@ 

    16 . 2

    It uses the scanner’s start and current pointers to capture the token’s lexeme. It sets a couple of other obvious fields then returns the token. It has a sister function for returning error tokens.

    -
    scanner.c
    +
    scanner.c
    add after makeToken()
    -
    static Token errorToken(const char* message) {
    +
    static Token errorToken(const char* message) {
       Token token;
       token.type = TOKEN_ERROR;
       token.start = message;
    @@ -657,7 +657,7 @@ 

    16 . 2 rules.

    16 . 3A Lexical Grammar for Lox

    The simplest tokens are only a single character. We recognize those like so:

    -
      if (isAtEnd()) return makeToken(TOKEN_EOF);
    +
      if (isAtEnd()) return makeToken(TOKEN_EOF);
     
    scanner.c
    in scanToken()
    @@ -687,9 +687,9 @@ 

    1 switch to see if it matches any of Lox’s one-character lexemes. To read the next character, we use a new helper which consumes the current character and returns it.

    -
    scanner.c
    +
    scanner.c
    add after isAtEnd()
    -
    static char advance() {
    +
    static char advance() {
       scanner.current++;
       return scanner.current[-1];
     }
    @@ -700,7 +700,7 @@ 

    1 these also has a corresponding single-character token. That means that when we see a character like !, we don’t know if we’re in a ! token or a != until we look at the next character too. We handle those like so:

    -
        case '*': return makeToken(TOKEN_STAR);
    +
        case '*': return makeToken(TOKEN_STAR);
     
    scanner.c
    in scanToken()
        case '!':
    @@ -724,9 +724,9 @@ 

    1 current character alone (so it can be part of the next token) and return the appropriate one-character token.

    That logic for conditionally consuming the second character lives here:

    -
    scanner.c
    +
    scanner.c
    add after advance()
    -
    static bool match(char expected) {
    +
    static bool match(char expected) {
       if (isAtEnd()) return false;
       if (*scanner.current != expected) return false;
       scanner.current++;
    @@ -748,7 +748,7 @@ 

    16 . 3 .̴ when you call it. We’d have to wrap the whole body of the function in a loop or something.

    Instead, before starting the token, we shunt off to a separate function.

    -
    Token scanToken() {
    +
    Token scanToken() {
     
    scanner.c
    in scanToken()
      skipWhitespace();
    @@ -759,9 +759,9 @@ 

    16 . 3 .̴

    This advances the scanner past any leading whitespace. After this call returns, we know the very next character is a meaningful one (or we’re at the end of the source code).

    -
    scanner.c
    +
    scanner.c
    add after errorToken()
    -
    static void skipWhitespace() {
    +
    static void skipWhitespace() {
       for (;;) {
         char c = peek();
         switch (c) {
    @@ -781,9 +781,9 @@ 

    16 . 3 .̴

    It’s sort of a separate mini-scanner. It loops, consuming every whitespace character it encounters. We need to be careful that it does not consume any non-whitespace characters. To support that, we use this:

    -
    scanner.c
    +
    scanner.c
    add after advance()
    -
    static char peek() {
    +
    static char peek() {
       return *scanner.current;
     }
     
    @@ -791,7 +791,7 @@

    16 . 3 .̴

    This simply returns the current character, but doesn’t consume it. The previous code handles all the whitespace characters except for newlines.

    -
            break;
    +
            break;
     
    scanner.c
    in skipWhitespace()
          case '\n':
    @@ -808,7 +808,7 @@ 

    16 . 3 . 2<

    Comments aren’t technically “whitespace”, if you want to get all precise with your terminology, but as far as Lox is concerned, they may as well be, so we skip those too.

    -
            break;
    +
            break;
     
    scanner.c
    in skipWhitespace()
          case '/':
    @@ -829,9 +829,9 @@ 

    16 . 3 . 2< even if the = wasn’t found. Comments are different. If we don’t find a second /, then skipWhitespace() needs to not consume the first slash either.

    To handle that, we add:

    -
    scanner.c
    +
    scanner.c
    add after peek()
    -
    static char peekNext() {
    +
    static char peekNext() {
       if (isAtEnd()) return '\0';
       return scanner.current[1];
     }
    @@ -848,7 +848,7 @@ 

    16 . 3̴

    Number and string tokens are special because they have a runtime value associated with them. We’ll start with strings because they are easy to recognizethey always begin with a double quote.

    -
              match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
    +
              match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
     
    scanner.c
    in scanToken()
        case '"': return string();
    @@ -857,9 +857,9 @@ 

    16 . 3̴
    scanner.c, in scanToken()

    That calls a new function.

    -
    scanner.c
    +
    scanner.c
    add after skipWhitespace()
    -
    static Token string() {
    +
    static Token string() {
       while (peek() != '"' && !isAtEnd()) {
         if (peek() == '\n') scanner.line++;
         advance();
    @@ -900,7 +900,7 @@ 

    16 . 3̴

    Next up, numbers. Instead of adding a switch case for each of the ten digits that can start a number, we handle them here:

    -
      char c = advance();
    +
      char c = advance();
     
    scanner.c
    in scanToken()
      if (isDigit(c)) return number();
    @@ -911,18 +911,18 @@ 

    16 . 3̴
    scanner.c, in scanToken()

    That uses this obvious utility function:

    -
    scanner.c
    +
    scanner.c
    add after initScanner()
    -
    static bool isDigit(char c) {
    +
    static bool isDigit(char c) {
       return c >= '0' && c <= '9';
     }
     
    scanner.c, add after initScanner()

    We finish scanning the number using this:

    -
    scanner.c
    +
    scanner.c
    add after skipWhitespace()
    -
    static Token number() {
    +
    static Token number() {
       while (isDigit(peek())) advance();
     
       // Look for a fractional part.
    @@ -947,7 +947,7 @@ 

    16& structures.

    First, though, we have to scan the lexeme. Names start with a letter or underscore.

    -
      char c = advance();
    +
      char c = advance();
     
    scanner.c
    in scanToken()
      if (isAlpha(c)) return identifier();
    @@ -956,9 +956,9 @@ 

    16&
    scanner.c, in scanToken()

    We recognize those using this:

    -
    scanner.c
    +
    scanner.c
    add after initScanner()
    -
    static bool isAlpha(char c) {
    +
    static bool isAlpha(char c) {
       return (c >= 'a' && c <= 'z') ||
              (c >= 'A' && c <= 'Z') ||
               c == '_';
    @@ -967,9 +967,9 @@ 

    16&
    scanner.c, add after initScanner()

    Once we’ve found an identifier, we scan the rest of it here:

    -
    scanner.c
    +
    scanner.c
    add after skipWhitespace()
    -
    static Token identifier() {
    +
    static Token identifier() {
       while (isAlpha(peek()) || isDigit(peek())) advance();
       return makeToken(identifierType());
     }
    @@ -979,9 +979,9 @@ 

    16&

    After the first letter, we allow digits too, and we keep consuming alphanumerics until we run out of them. Then we produce a token with the proper type. Determining that “proper” type is the unique part of this chapter.

    -
    scanner.c
    +
    scanner.c
    add after skipWhitespace()
    -
    static TokenType identifierType() {
    +
    static TokenType identifierType() {
       return TOKEN_IDENTIFIER;
     }
     
    @@ -1094,7 +1094,7 @@

    16& and that’s currently one of the world’s most sophisticated, fastest language implementations.

    -
    static TokenType identifierType() {
    +
    static TokenType identifierType() {
     
    scanner.c
    in identifierType()
      switch (scanner.start[0]) {
    @@ -1121,9 +1121,9 @@ 

    16& diagram, this is basically that straight path hanging off the “s”.

    We won’t roll a switch for each of those nodes. Instead, we have a utility function that tests the rest of a potential keyword’s lexeme.

    -
    scanner.c
    +
    scanner.c
    add after skipWhitespace()
    -
    static TokenType checkKeyword(int start, int length,
    +
    static TokenType checkKeyword(int start, int length,
         const char* rest, TokenType type) {
       if (scanner.current - scanner.start == start + length &&
           memcmp(scanner.start + start, rest, length) == 0) {
    @@ -1146,7 +1146,7 @@ 

    16&

    We have a couple of keywords where the tree branches again after the first letter. If the lexeme starts with “f”, it could be false, for, or fun. So we add another switch for the branches coming off the “f” node.

    -
        case 'e': return checkKeyword(1, 3, "lse", TOKEN_ELSE);
    +
        case 'e': return checkKeyword(1, 3, "lse", TOKEN_ELSE);
     
    scanner.c
    in identifierType()
        case 'f':
    @@ -1165,7 +1165,7 @@ 

    16&

    Before we switch, we need to check that there even is a second letter. “f” by itself is a valid identifier too, after all. The other letter that branches is “t”.

    -
        case 's': return checkKeyword(1, 4, "uper", TOKEN_SUPER);
    +
        case 's': return checkKeyword(1, 4, "uper", TOKEN_SUPER);
     
    scanner.c
    in identifierType()
        case 't':
    @@ -1202,19 +1202,19 @@ 

    Challenges

    expression is evaluated, converted to a string, and then merged with the surrounding string literal.

    For example, if Lox supported string interpolation, then this . . . 

    -
    var drink = "Tea";
    +
    var drink = "Tea";
     var steep = 4;
     var cool = 2;
     print "${drink} will be ready in ${steep + cool} minutes.";
     

     . . . would print:

    -
    Tea will be ready in 6 minutes.
    +
    Tea will be ready in 6 minutes.
     

    What token types would you define to implement a scanner for string interpolation? What sequence of tokens would you emit for the above string literal?

    What tokens would you emit for:

    -
    "Nested ${"interpolation?! Are you ${"mad?!"}"}"
    +
    "Nested ${"interpolation?! Are you ${"mad?!"}"}"
     

    Consider looking at other language implementations that support interpolation to see how they handle it.

    @@ -1222,7 +1222,7 @@

    Challenges

  • Several languages use angle brackets for generics and also have a >> right shift operator. This led to a classic problem in early versions of C++:

    -
    vector<vector<string>> nestedVectors;
    +
    vector<vector<string>> nestedVectors;
     

    This would produce a compile error because the >> was lexed to a single right shift token, not two > tokens. Users were forced to avoid this by diff --git a/site/scanning.html b/site/scanning.html index a363e3597..a38052767 100644 --- a/site/scanning.html +++ b/site/scanning.html @@ -1,5 +1,5 @@ - + Scanning · Crafting Interpreters @@ -127,9 +127,9 @@

    4

    Since this is our first real chapter, before we get to actually scanning some code we need to sketch out the basic shape of our interpreter, jlox. Everything starts with a class in Java.

    -
    lox/Lox.java
    +
    lox/Lox.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.io.BufferedReader;
     import java.io.IOException;
    @@ -164,9 +164,9 @@ 

    4

    Lox is a scripting language, which means it executes directly from source. Our interpreter supports two ways of running code. If you start jlox from the command line and give it a path to a file, it reads the file and executes it.

    -
    lox/Lox.java
    +
    lox/Lox.java
    add after main()
    -
      private static void runFile(String path) throws IOException {
    +
      private static void runFile(String path) throws IOException {
         byte[] bytes = Files.readAllBytes(Paths.get(path));
         run(new String(bytes, Charset.defaultCharset()));
       }
    @@ -180,14 +180,14 @@ 

    4

    An interactive prompt is also called a “REPL” (pronounced like “rebel” but with a “p”). The name comes from Lisp where implementing one is as simple as wrapping a loop around a few built-in functions:

    -
    (print (eval (read)))
    +
    (print (eval (read)))
     

    Working outwards from the most nested call, you Read a line of input, Evaluate it, Print the result, then Loop and do it all over again.

    -
    lox/Lox.java
    +
    lox/Lox.java
    add after runFile()
    -
      private static void runPrompt() throws IOException {
    +
      private static void runPrompt() throws IOException {
         InputStreamReader input = new InputStreamReader(System.in);
         BufferedReader reader = new BufferedReader(input);
     
    @@ -207,9 +207,9 @@ 

    4 “end-of-file” condition to the program. When that happens readLine() returns null, so we check for that to exit the loop.

    Both the prompt and the file runner are thin wrappers around this core function:

    -
    lox/Lox.java
    +
    lox/Lox.java
    add after runPrompt()
    -
      private static void run(String source) {
    +
      private static void run(String source) {
         Scanner scanner = new Scanner(source);
         List<Token> tokens = scanner.scanTokens();
     
    @@ -242,9 +242,9 @@ 

    4 . 1  bones. I’d love to talk about interactive debuggers, static analyzers, and other fun stuff, but there’s only so much ink in the pen.

    -
    lox/Lox.java
    +
    lox/Lox.java
    add after run()
    -
      static void error(int line, String message) {
    +
      static void error(int line, String message) {
         report(line, "", message);
       }
     
    @@ -261,12 +261,12 @@ 

    4 . 1  error occurred on a given line. That is really the bare minimum to be able to claim you even have error reporting. Imagine if you accidentally left a dangling comma in some function call and the interpreter printed out:

    -
    Error: Unexpected "," somewhere in your code. Good luck finding it!
    +
    Error: Unexpected "," somewhere in your code. Good luck finding it!
     

    That’s not very helpful. We need to at least point them to the right line. Even better would be the beginning and end column so they know where in the line. Even better than that is to show the user the offending line, like:

    -
    Error: Unexpected "," in argument list.
    +
    Error: Unexpected "," in argument list.
     
         15 | function(first, second,);
                                    ^-- Here.
    @@ -278,7 +278,7 @@ 

    4 . 1  not as I do.

    The primary reason we’re sticking this error reporting function in the main Lox class is because of that hadError field. It’s defined here:

    -
    public class Lox {
    +
    public class Lox {
     
    lox/Lox.java
    in class Lox
      static boolean hadError = false;
    @@ -288,7 +288,7 @@ 

    4 . 1 

    We’ll use this to ensure we don’t try to execute code that has a known error. Also, it lets us exit with a non-zero exit code like a good command line citizen should.

    -
        run(new String(bytes, Charset.defaultCharset()));
    +
        run(new String(bytes, Charset.defaultCharset()));
     
    lox/Lox.java
    in runFile()
    @@ -301,7 +301,7 @@ 

    4 . 1 

    We need to reset this flag in the interactive loop. If the user makes a mistake, it shouldn’t kill their entire session.

    -
          run(line);
    +
          run(line);
     
    lox/Lox.java
    in runPrompt()
          hadError = false;
    @@ -332,7 +332,7 @@ 

    4 . 1  it. Before we get to that, let’s get more precise about what tokens are.

    4 . 2Lexemes and Tokens

    Here’s a line of Lox code:

    -
    var language = "lox";
    +
    var language = "lox";
     

    Here, var is the keyword for declaring a variable. That three-character sequence “v-a-r” means something. But if we yank three letters out of the @@ -359,9 +359,9 @@

    4 . 2 . 

    After all, string comparison ends up looking at individual characters, and isn’t that the scanner’s job?

    -
    lox/TokenType.java
    +
    lox/TokenType.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     enum TokenType {
       // Single-character tokens.
    @@ -409,9 +409,9 @@ 

    4 .&# the better.

    We take all of this data and wrap it in a class.

    -
    lox/Token.java
    +
    lox/Token.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     class Token {
       final TokenType type;
    @@ -453,7 +453,7 @@ 

    [a-zA-Z_][a-zA-Z_0-9]*
    +
    [a-zA-Z_][a-zA-Z_0-9]*
     

    If you did think of regular expressions, your intuition is a deep one. The rules that determine how a particular language groups characters into lexemes are @@ -483,9 +483,9 @@

    4 . 4The Scanner Class

    Without further ado, let’s make ourselves a scanner.

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    create new file
    -
    package com.craftinginterpreters.lox;
    +
    package com.craftinginterpreters.lox;
     
     import java.util.ArrayList;
     import java.util.HashMap;
    @@ -513,9 +513,9 @@ 

    4 . 4

    We store the raw source code as a simple string, and we have a list ready to fill with tokens we’re going to generate. The aforementioned loop that does that looks like this:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after Scanner()
    -
      List<Token> scanTokens() {
    +
      List<Token> scanTokens() {
         while (!isAtEnd()) {
           // We are at the beginning of the next lexeme.
           start = current;
    @@ -533,7 +533,7 @@ 

    4 . 4 strictly needed, but it makes our parser a little cleaner.

    This loop depends on a couple of fields to keep track of where the scanner is in the source code.

    -
      private final List<Token> tokens = new ArrayList<>();
    +
      private final List<Token> tokens = new ArrayList<>();
     
    lox/Scanner.java
    in class Scanner
      private int start = 0;
    @@ -552,9 +552,9 @@ 

    4 . 4 location.

    Then we have one little helper function that tells us if we’ve consumed all the characters.

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after scanTokens()
    -
      private boolean isAtEnd() {
    +
      private boolean isAtEnd() {
         return current >= source.length();
       }
     
    @@ -566,9 +566,9 @@

    4 .R long. All you would need to do is consume the next character and pick a token type for it. Several lexemes are only a single character in Lox, so let’s start with those.

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after scanTokens()
    -
      private void scanToken() {
    +
      private void scanToken() {
         char c = advance();
         switch (c) {
           case '(': addToken(LEFT_PAREN); break;
    @@ -590,9 +590,9 @@ 

    4 .R

    Wondering why / isn’t in here? Don’t worry, we’ll get to it.

    Again, we need a couple of helper methods.

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after isAtEnd()
    -
      private char advance() {
    +
      private char advance() {
         return source.charAt(current++);
       }
     
    @@ -618,7 +618,7 @@ 

    4 . 5  characters get silently discarded. They aren’t used by the Lox language, but that doesn’t mean the interpreter can pretend they aren’t there. Instead, we report an error.

    -
          case '*': addToken(STAR); break; 
    +
          case '*': addToken(STAR); break; 
     
    lox/Scanner.java
    in scanToken()
    @@ -655,7 +655,7 @@ 

    4 . 5 . 2 can all be followed by = to create the other equality and comparison operators.

    For all of these, we need to look at the second character.

    -
          case '*': addToken(STAR); break; 
    +
          case '*': addToken(STAR); break; 
     
    lox/Scanner.java
    in scanToken()
          case '!':
    @@ -677,9 +677,9 @@ 

    4 . 5 . 2
    lox/Scanner.java, in scanToken()

    Those cases use this new method:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after scanToken()
    -
      private boolean match(char expected) {
    +
      private boolean match(char expected) {
         if (isAtEnd()) return false;
         if (source.charAt(current) != expected) return false;
     
    @@ -698,7 +698,7 @@ 

    4 . 5 . 2

    4 . 6Longer Lexemes

    We’re still missing one operator: / for division. That character needs a little special handling because comments begin with a slash too.

    -
            break;
    +
            break;
     
    lox/Scanner.java
    in scanToken()
          case '/':
    @@ -722,9 +722,9 @@ 

    4 . 6

    We’ve got another helper:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after match()
    -
      private char peek() {
    +
      private char peek() {
         if (isAtEnd()) return '\0';
         return source.charAt(current);
       }
    @@ -747,7 +747,7 @@ 

    4 . 6

    While we’re at it, now’s a good time to skip over those other meaningless characters: newlines and whitespace.

    -
            break;
    +
            break;
     
    lox/Scanner.java
    in scanToken()
    @@ -774,14 +774,14 @@ 

    4 . 6peek() to find the newline ending a comment instead of match(). We want that newline to get us here so we can update line.)

    Our scanner is getting smarter. It can handle fairly free-form code like:

    -
    // this is a comment
    +
    // this is a comment
     (( )){} // grouping stuff
     !*+-/=<> <= == // operators
     

    4 . 6 . 1String literals

    Now that we’re comfortable with longer lexemes, we’re ready to tackle literals. We’ll do strings first, since they always begin with a specific character, ".

    -
            break;
    +
            break;
     
    lox/Scanner.java
    in scanToken()
    @@ -794,9 +794,9 @@ 

    4 . 6R
    lox/Scanner.java, in scanToken()

    That calls:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after scanToken()
    -
      private void string() {
    +
      private void string() {
         while (peek() != '"' && !isAtEnd()) {
           if (peek() == '\n') line++;
           advance();
    @@ -838,21 +838,21 @@ 

    4 . 6R number literal. Instead, -123, is an expression that applies - to the number literal 123. In practice, the result is the same, though it has one interesting edge case if we were to add method calls on numbers. Consider:

    -
    print -123.abs();
    +
    print -123.abs();
     

    This prints -123 because negation has lower precedence than method calls. We could fix that by making - part of the number literal. But then consider:

    -
    var n = 123;
    +
    var n = 123;
     print -n.abs();
     

    This still produces -123, so now the language seems inconsistent. No matter what you do, some case ends up weird.

    -
    1234
    +
    1234
     12.34
     

    We don’t allow a leading or trailing decimal point, so these are both invalid:

    -
    .1234
    +
    .1234
     1234.
     

    We could easily support the former, but I left it out to keep things simple. The @@ -860,7 +860,7 @@

    4 . 6R

    To recognize the beginning of a number lexeme, we look for any digit. It’s kind of tedious to add cases for every decimal digit, so we’ll stuff it in the default case instead.

    -
          default:
    +
          default:
     
    lox/Scanner.java
    in scanToken()
    replace 1 line
    @@ -874,9 +874,9 @@

    4 . 6R
    lox/Scanner.java, in scanToken(), replace 1 line

    This relies on this little utility:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after peek()
    -
      private boolean isDigit(char c) {
    +
      private boolean isDigit(char c) {
         return c >= '0' && c <= '9';
       } 
     
    @@ -889,9 +889,9 @@

    4 . 6R

    Once we know we are in a number, we branch to a separate method to consume the rest of the literal, like we do with strings.

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after scanToken()
    -
      private void number() {
    +
      private void number() {
         while (isDigit(peek())) advance();
     
         // Look for a fractional part.
    @@ -915,9 +915,9 @@ 

    4 . 6R

    Looking past the decimal point requires a second character of lookahead since we don’t want to consume the . until we’re sure there is a digit after it. So we add:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after peek()
    -
      private char peekNext() {
    +
      private char peekNext() {
         if (current + 1 >= source.length()) return '\0';
         return source.charAt(current + 1);
       } 
    @@ -942,7 +942,7 @@ 

    case 'o':
    +
    case 'o':
       if (match('r')) {
         addToken(OR);
       }
    @@ -959,16 +959,16 @@ 

    Consider this nasty bit of C code:

    -
    ---a;
    +
    ---a;
     

    Is it valid? That depends on how the scanner splits the lexemes. What if the scanner sees it like this:

    -
    - --a;
    +
    - --a;
     

    Then it could be parsed. But that would require the scanner to know about the grammatical structure of the surrounding code, which entangles things more than we want. Instead, the maximal munch rule says that it is always scanned like:

    -
    -- -a;
    +
    -- -a;
     

    It scans it that way even though doing so leads to a syntax error later in the parser.

    @@ -979,7 +979,7 @@

          default:
    +
          default:
             if (isDigit(c)) {
               number();
     

    The rest of the code lives over here:

    -
    lox/Scanner.java
    +

    We define that in terms of these helpers:

    -
    lox/Scanner.java
    +
    lox/Scanner.java
    add after peekNext()
    -
      private boolean isAlpha(char c) {
    +
      private boolean isAlpha(char c) {
         return (c >= 'a' && c <= 'z') ||
                (c >= 'A' && c <= 'Z') ||
                 c == '_';
    @@ -1021,9 +1021,9 @@ 

    lox/Scanner.java
    +
    lox/Scanner.java
    in class Scanner
    -
      private static final Map<String, TokenType> keywords;
    +
      private static final Map<String, TokenType> keywords;
     
       static {
         keywords = new HashMap<>();
    @@ -1049,7 +1049,7 @@ 

        while (isAlphaNumeric(peek())) advance();
    +
        while (isAlphaNumeric(peek())) advance();
     
     
    lox/Scanner.java
    in identifier()
    @@ -1109,7 +1109,7 @@

    Design Note: Implicit Semicolons

    For example:

    -

  • @@ -241,14 +241,14 @@

    3 . 3Data T

    Full-featured languages have lots of syntax for numbershexadecimal, scientific notation, octal, all sorts of fun stuff. We’ll settle for basic integer and decimal literals.

    -
    1234;  // An integer.
    +
    1234;  // An integer.
     12.34; // A decimal number.
     

  • Strings. We’ve already seen one string literal in the first example. Like most languages, they are enclosed in double quotes.

    -
    "I am a string";
    +
    "I am a string";
     "";    // The empty string.
     "123"; // This is a string, not a number.
     
    @@ -279,7 +279,7 @@

    3 . 4Expr

    3 . 4 . 1Arithmetic

    Lox features the basic arithmetic operators you know and love from C and other languages:

    -
    add + me;
    +
    add + me;
     subtract - me;
     multiply * me;
     divide / me;
    @@ -294,14 +294,14 @@ 

    3 . 4 . 

    There are some operators that have more than two operands and the operators are interleaved between them. The only one in wide usage is the “conditional” or “ternary” operator of C and friends:

    -
    condition ? thenArm : elseArm;
    +
    condition ? thenArm : elseArm;
     

    Some call these mixfix operators. A few languages let you define your own operators and control how they are positionedtheir “fixity”.

    One arithmetic operator is actually both an infix and a prefix one. The - operator can also be used to negate a number.

    -
    -negateMe;
    +
    -negateMe;
     

    All of these operators work on numbers, and it’s an error to pass any other types to them. The exception is the + operatoryou can also pass it two @@ -309,38 +309,38 @@

    3 . 4 . 

    3 . 4 . 2Comparison and equality

    Moving along, we have a few more operators that always return a Boolean result. We can compare numbers (and only numbers), using Ye Olde Comparison Operators.

    -
    less < than;
    +
    less < than;
     lessThan <= orEqual;
     greater > than;
     greaterThan >= orEqual;
     

    We can test two values of any kind for equality or inequality.

    -
    1 == 2;         // false.
    +
    1 == 2;         // false.
     "cat" != "dog"; // true.
     

    Even different types.

    -
    314 == "pi"; // false.
    +
    314 == "pi"; // false.
     

    Values of different types are never equivalent.

    -
    123 == "123"; // false.
    +
    123 == "123"; // false.
     

    I’m generally against implicit conversions.

    3 . 4 . 3Logical operators

    The not operator, a prefix !, returns false if its operand is true, and vice versa.

    -
    !true;  // false.
    +
    !true;  // false.
     !false; // true.
     

    The other two logical operators really are control flow constructs in the guise of expressions. An and expression determines if two values are both true. It returns the left operand if it’s false, or the right operand otherwise.

    -
    true and false; // false.
    +
    true and false; // false.
     true and true;  // true.
     

    And an or expression determines if either of two values (or both) are true. It returns the left operand if it is true and the right operand otherwise.

    -
    false or false; // false.
    +
    false or false; // false.
     true or false;  // true.
     
    -
    "some expression";
    +
    "some expression";
     

    An expression followed by a semicolon (;) promotes the expression to statement-hood. This is called (imaginatively enough), an expression statement.

    If you want to pack a series of statements where a single one is expected, you can wrap them up in a block.

    -
    {
    +
    {
       print "One statement.";
       print "Two statements.";
     }
    @@ -405,12 +405,12 @@ 

    3 . 6Variable be initialized to some value would be more annoying than dealing with nil itself.

    -
    var imAVariable = "here is my value";
    +
    var imAVariable = "here is my value";
     var iAmNil;
     

    Once declared, you can, naturally, access and assign a variable using its name.

    -
    var breakfast = "bagels";
    +
    var breakfast = "bagels";
     print breakfast; // "bagels".
     breakfast = "beignets";
     print breakfast; // "beignets".
    @@ -436,7 +436,7 @@ 

    3 . 7Co relies on dynamic dispatch for selectively executing code.

    An if statement executes one of two statements based on some condition.

    -
    if (condition) {
    +
     

    A while loop executes the body repeatedly as long as the condition expression evaluates to true.

    -
    var a = 1;
    +
    var a = 1;
     while (a < 10) {
       print a;
       a = a + 1;
    @@ -456,7 +456,7 @@ 

    3 . 7Co it to your implementation if it makes you happy. It’s your party.

    Finally, we have for loops.

    -
    for (var a = 1; a < 10; a = a + 1) {
    +
    for (var a = 1; a < 10; a = a + 1) {
       print a;
     }
     
    @@ -474,10 +474,10 @@

    3 . 7Co

    3 . 8Functions

    A function call expression looks the same as it does in C.

    -
    makeBreakfast(bacon, eggs, toast);
    +
    makeBreakfast(bacon, eggs, toast);
     

    You can also call a function without passing anything to it.

    -
    makeBreakfast();
    +
    makeBreakfast();
     

    Unlike in, say, Ruby, the parentheses are mandatory in this case. If you leave them off, the name doesn’t call the function, it just refers to it.

    @@ -487,7 +487,7 @@

    3 . 8Function

    I’ve seen languages that use fn, fun, func, and function. I’m still hoping to discover a funct, functi, or functio somewhere.

    -
    fun printSum(a, b) {
    +
    fun printSum(a, b) {
       print a + b;
     }
     
    @@ -518,7 +518,7 @@

    3 . 8Function

    The body of a function is always a block. Inside it, you can return a value using a return statement.

    -
    fun returnSum(a, b) {
    +
    fun returnSum(a, b) {
       return a + b;
     }
     
    @@ -530,7 +530,7 @@

    3 . 8Function

    3 . 8 . 1Closures

    Functions are first class in Lox, which just means they are real values that you can get a reference to, store in variables, pass around, etc. This works:

    -
    fun addPair(a, b) {
    +
    fun addPair(a, b) {
       return a + b;
     }
     
    @@ -542,7 +542,7 @@ 

    3 . 8 . 1

    Since function declarations are statements, you can declare local functions inside another function.

    -
    fun outerFunction() {
    +
     

    If you combine local functions, first-class functions, and block scope, you run into this interesting situation:

    -
    fun returnFunction() {
    +
    fun returnFunction() {
       var outside = "outside";
     
       fun inner() {
    @@ -674,7 +674,7 @@ 

    3 . 9 

    Enough rationale, let’s see what we actually have. Classes encompass a constellation of features in most languages. For Lox, I’ve selected what I think are the brightest stars. You declare a class and its methods like so:

    -
    class Breakfast {
    +
    class Breakfast {
       cook() {
         print "Eggs a-fryin'!";
       }
    @@ -692,7 +692,7 @@ 

    3 . 9  -
    // Store it in variables.
    +
    // Store it in variables.
     var someVariable = Breakfast;
     
     // Pass it to functions.
    @@ -702,7 +702,7 @@ 

    3 . 9  keyword, but to keep things simple, in Lox the class itself is a factory function for instances. Call a class like a function, and it produces a new instance of itself.

    -
    var breakfast = Breakfast();
    +
    var breakfast = Breakfast();
     print breakfast; // "Breakfast instance".
     

    3 . 9 . 5Instantiation and initialization

    @@ -710,13 +710,13 @@

    breakfast.meat = "sausage";
    +
    breakfast.meat = "sausage";
     breakfast.bread = "sourdough";
     

    Assigning to a field creates it if it doesn’t already exist.

    If you want to access a field or method on the current object from within a method, you use good old this.

    -
    class Breakfast {
    +
    class Breakfast {
       serve(who) {
         print "Enjoy your " + this.meat + " and " +
             this.bread + ", " + who + ".";
    @@ -730,7 +730,7 @@ 

    class Breakfast {
    +
    class Breakfast {
       init(meat, bread) {
         this.meat = meat;
         this.bread = bread;
    @@ -748,7 +748,7 @@ 

    3 . 9 .R across multiple classes or objects. For that, Lox supports single inheritance. When you declare a class, you can specify a class that it inherits from using a less-than (<) operator.

    -
    class Brunch < Breakfast {
    +
    class Brunch < Breakfast {
       drink() {
         print "How about a Bloody Mary?";
       }
    @@ -767,7 +767,7 @@ 

    3 . 9 .R

    Here, Brunch is the derived class or subclass, and Breakfast is the base class or superclass.

    Every method defined in the superclass is also available to its subclasses.

    -
    var benedict = Brunch("ham", "English muffin");
    +
    var benedict = Brunch("ham", "English muffin");
     benedict.serve("Noble Reader");
     

    Even the init() method gets inherited. In practice, @@ -780,7 +780,7 @@

    3 . 9 .R similar to Smalltalk and Ruby, which do.

    As in Java, you use super for that.

    -
    class Brunch < Breakfast {
    +
    class Brunch < Breakfast {
       init(meat, bread, drink) {
         super.init(meat, bread);
         this.drink = drink;
    @@ -864,7 +864,7 @@ 

    Design Note: Expressions and Stateme

    You also have to decide how these statement-like expressions compose with other expressionsyou have to fit them into the grammar’s precedence table. For example, Ruby allows:

    -
    puts 1 + if true then 2 else 3 end + 4
    +
    puts 1 + if true then 2 else 3 end + 4
     

    Is this what you’d expect? Is it what your users expect? How does this affect how you design the syntax for your “statements”? Note that Ruby has an explicit diff --git a/site/types-of-values.html b/site/types-of-values.html index ccd9a27b7..c92cf19fe 100644 --- a/site/types-of-values.html +++ b/site/types-of-values.html @@ -1,5 +1,5 @@ - + Types of Values · Crafting Interpreters @@ -140,7 +140,7 @@

    18 . 1tagged union. A value contains two parts: a type “tag”, and a payload for the actual value. To store the value’s type, we define an enum for each kind of value the VM supports.

    -
    #include "common.h"
    +
    #include "common.h"
     
     
    value.h
    typedef enum {
    @@ -186,7 +186,7 @@ 

    18 . 1

    As the name “tagged union” implies, our new value representation combines these two parts into a single struct.

    -
    } ValueType;
    +
    } ValueType;
     
     
    value.h
    add after enum ValueType
    @@ -242,7 +242,7 @@

    18 working again.

    We’ll implement these conversions as a handful of macros, one for each type and operation. First, to promote a native C value to a clox Value:

    -
    } Value;
    +
    } Value;
     
    value.h
    add after struct Value
    @@ -261,7 +261,7 @@ 

    18 statically typed values up into clox’s dynamically typed universe. In order to do anything with a Value, though, we need to unpack it and get the C value back out.

    -
    } Value;
    +
    } Value;
     
    value.h
    add after struct Value
    @@ -282,13 +282,13 @@ 

    18 Value of the right type, they unwrap it and return the corresponding raw C value. The “right type” part is important! These macros directly access the union fields. If we were to do something like:

    -
    Value value = BOOL_VAL(true);
    +
    Value value = BOOL_VAL(true);
     double number = AS_NUMBER(value);
     

    Then we may open a smoldering portal to the Shadow Realm. It’s not safe to use any of the AS_ macros unless we know the Value contains the appropriate type. To that end, we define a last few macros to check a Value’s type.

    -
    } Value;
    +
    } Value;
     
    value.h
    add after struct Value
    @@ -319,7 +319,7 @@ 

    1

    The first values we create are the constants generated when we compile number literals. After we convert the lexeme to a C double, we simply wrap it in a Value before storing it in the constant table.

    -
      double value = strtod(parser.previous.start, NULL);
    +
      double value = strtod(parser.previous.start, NULL);
     
    compiler.c
    in number()
    replace 1 line
    @@ -329,7 +329,7 @@

    1
    compiler.c, in number(), replace 1 line

    Over in the runtime, we have a function to print values.

    -
    void printValue(Value value) {
    +
    void printValue(Value value) {
     
    value.c
    in printValue()
    replace 1 line
    @@ -345,13 +345,13 @@

    vm.c
    +
    vm.c
    add after pop()
    -
    static Value peek(int distance) {
    +
    static Value peek(int distance) {
       return vm.stackTop[-1 - distance];
     }
     
    @@ -396,9 +396,9 @@

    vm.c
    +
    vm.c
    add after resetStack()
    -
    static void runtimeError(const char* format, ...) {
    +
    static void runtimeError(const char* format, ...) {
       va_list args;
       va_start(args, format);
       vfprintf(stderr, format, args);
    @@ -447,7 +447,7 @@ 

    vm.c
    +
    vm.c
    add to top of file
    #include <stdarg.h>
     
    #include <stdio.h>
    @@ -467,7 +467,7 @@ 

    That macro seemed like overkill a few chapters ago, but we get the benefit from it today. It lets us add the necessary type checking and conversions in one place.

    -
    #define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
    +
    #define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()])
     
    vm.c
    in run()
    replace 6 lines
    @@ -500,7 +500,7 @@

    Did you know you can pass macros as parameters to macros? Now you do!

    -
          }
    +
          }
     
    vm.c
    in run()
    replace 4 lines
    @@ -543,7 +543,7 @@

    18 . 4 -
      OP_CONSTANT,
    +
      OP_CONSTANT,
     
    chunk.h
    in enum OpCode
      OP_NIL,
    @@ -557,7 +557,7 @@ 

    18 . 4 -
      [TOKEN_ELSE]          = {NULL,     NULL,   PREC_NONE},
    +
     
     

    Here:

    -
      [TOKEN_THIS]          = {NULL,     NULL,   PREC_NONE},
    +
     
     

    And here:

    -
      [TOKEN_IF]            = {NULL,     NULL,   PREC_NONE},
    +
      [TOKEN_IF]            = {NULL,     NULL,   PREC_NONE},
     
    compiler.c
    replace 1 line
      [TOKEN_NIL]           = {literal,  NULL,   PREC_NONE},
    @@ -585,9 +585,9 @@ 

    18 . 4When the parser encounters false, nil, or true, in prefix position, it calls this new parser function:

    -
    compiler.c
    +
    compiler.c
    add after binary()
    -
    static void literal() {
    +
    static void literal() {
       switch (parser.previous.type) {
         case TOKEN_FALSE: emitByte(OP_FALSE); break;
         case TOKEN_NIL: emitByte(OP_NIL); break;
    @@ -608,7 +608,7 @@ 

    18 . 4 -
          case OP_CONSTANT: {
    +
          case OP_CONSTANT: {
             Value constant = READ_CONSTANT();
             push(constant);
             break;
    @@ -624,7 +624,7 @@ 

    18 . 4This is pretty self-explanatory. Each instruction summons the appropriate value and pushes it onto the stack. We shouldn’t forget our disassembler either.

    -
        case OP_CONSTANT:
    +
     
     

    With this in place, we can run this Earth-shattering program:

    -
    true
    +
    true
     

    Except that when the interpreter tries to print the result, it blows up. We need to extend printValue() to handle the new types too:

    -
    void printValue(Value value) {
    +
    void printValue(Value value) {
     
    value.c
    in printValue()
    replace 1 line
    @@ -664,10 +664,10 @@

    18 . 4

    18 . 4 . 1Logical not and falsiness

    The simplest logical operator is our old exclamatory friend unary not.

    -
    print !true; // "false"
    +
    print !true; // "false"
     

    This new operation gets a new instruction.

    -
      OP_DIVIDE,
    +
      OP_DIVIDE,
     
    chunk.h
    in enum OpCode
      OP_NOT,
    @@ -677,7 +677,7 @@ 

    1

    We can reuse the unary() parser function we wrote for unary negation to compile a not expression. We just need to slot it into the parsing table.

    -
      [TOKEN_STAR]          = {NULL,     binary, PREC_FACTOR},
    +
      [TOKEN_STAR]          = {NULL,     binary, PREC_FACTOR},
     
    compiler.c
    replace 1 line
      [TOKEN_BANG]          = {unary,    NULL,   PREC_NONE},
    @@ -688,7 +688,7 @@ 

    1

    Because I knew we were going to do this, the unary() function already has a switch on the token type to figure out which bytecode instruction to output. We merely add another case.

    -
      switch (operatorType) {
    +
      switch (operatorType) {
     
    compiler.c
    in unary()
        case TOKEN_BANG: emitByte(OP_NOT); break;
    @@ -700,7 +700,7 @@ 

    1

    That’s it for the front end. Let’s head over to the VM and conjure this instruction into life.

    -
          case OP_DIVIDE:   BINARY_OP(NUMBER_VAL, /); break;
    +
          case OP_DIVIDE:   BINARY_OP(NUMBER_VAL, /); break;
     
    vm.c
    in run()
          case OP_NOT:
    @@ -714,7 +714,7 @@ 

    1 operation, and pushes the result. And, as we did there, we have to worry about dynamic typing. Taking the logical not of true is easy, but there’s nothing preventing an unruly programmer from writing something like this:

    -
    print !nil;
    +
    print !nil;
     

    For unary minus, we made it an error to negate anything that isn’t a number. But Lox, like most scripting languages, is more @@ -726,9 +726,9 @@

    1 of values. nil is probably its own negation, sort of like a weird pseudo-zero. Negating a string could, uh, reverse it?

    -
    vm.c
    +
    vm.c
    add after peek()
    -
    static bool isFalsey(Value value) {
    +
    static bool isFalsey(Value value) {
       return IS_NIL(value) || (IS_BOOL(value) && !AS_BOOL(value));
     }
     
    @@ -737,7 +737,7 @@

    1

    Lox follows Ruby in that nil and false are falsey and every other value behaves like true. We’ve got a new instruction we can generate, so we also need to be able to ungenerate it in the disassembler.

    -
        case OP_DIVIDE:
    +
        case OP_DIVIDE:
           return simpleInstruction("OP_DIVIDE", offset);
     
    debug.c
    in disassembleInstruction()
    @@ -754,7 +754,7 @@

      OP_FALSE,
    +
      OP_FALSE,
     
    chunk.h
    in enum OpCode
      OP_EQUAL,
    @@ -787,7 +787,7 @@ 

      [TOKEN_BANG]          = {unary,    NULL,   PREC_NONE},
    +
     
     

    The remaining five operators are a little farther down in the table.

    -
      [TOKEN_EQUAL]         = {NULL,     NULL,   PREC_NONE},
    +