From b850e1163ebfedef6c44b3ca3c4496b8e1b4d3ac Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 24 Sep 2024 23:16:49 -0400 Subject: [PATCH 1/8] Move local/global env code from exe.c to env.c --- env.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ exe.c | 199 +-------------------------------------------------------- 2 files changed, 204 insertions(+), 196 deletions(-) create mode 100644 env.c diff --git a/env.c b/env.c new file mode 100644 index 0000000..1927dcf --- /dev/null +++ b/env.c @@ -0,0 +1,201 @@ +int cgc_fs = 0; +// Function bindings that follows lexical scoping rules +int cgc_locals = 0; +// Like cgc_locals, but with 1 scope for the entire function. Used for goto labels +int cgc_locals_fun = 0; +// Global bindings +int cgc_globals = 0; +// Bump allocator used to allocate static objects +int cgc_global_alloc = 0; + +enum BINDING { + // Because function params, local and global variables all share the same + // namespace and we want to find the first one of them, we need to keep + // BINDING_PARAM_LOCAL, BINDING_VAR_LOCAL and BINDING_VAR_GLOBAL + // in consecutive order. + BINDING_PARAM_LOCAL, + BINDING_VAR_LOCAL, + BINDING_VAR_GLOBAL, + BINDING_ENUM_CST, + BINDING_LOOP, + BINDING_SWITCH, + BINDING_FUN, + BINDING_GOTO_LABEL, + BINDING_TYPE_STRUCT, + BINDING_TYPE_UNION, + BINDING_TYPE_ENUM, +}; + +void cgc_add_local_param(int ident, int size, ast type) { + int binding = alloc_obj(6); + heap[binding+0] = cgc_locals; + heap[binding+1] = BINDING_PARAM_LOCAL; + heap[binding+2] = ident; + heap[binding+3] = size; + heap[binding+4] = cgc_fs; + heap[binding+5] = type; +#ifdef sh + cgc_fs += size; +#else + cgc_fs -= size; +#endif + cgc_locals = binding; +} + +void cgc_add_local(int ident, int size, ast type) { + int binding = alloc_obj(6); + cgc_fs += size; + heap[binding+0] = cgc_locals; + heap[binding+1] = BINDING_VAR_LOCAL; + heap[binding+2] = ident; + heap[binding+3] = size; + heap[binding+4] = cgc_fs; + heap[binding+5] = type; + cgc_locals = binding; +} + +void cgc_add_enclosing_loop(int loop_fs, int break_lbl, ast continue_lbl) { + int binding = alloc_obj(5); + heap[binding+0] = cgc_locals; + heap[binding+1] = BINDING_LOOP; + heap[binding+2] = loop_fs; + heap[binding+3] = break_lbl; + heap[binding+4] = continue_lbl; + cgc_locals = binding; +} + +void cgc_add_enclosing_switch(int loop_fs, int break_lbl, int next_case_lbl) { + int binding = alloc_obj(5); + heap[binding+0] = cgc_locals; + heap[binding+1] = BINDING_SWITCH; + heap[binding+2] = loop_fs; + heap[binding+3] = break_lbl; + heap[binding+4] = next_case_lbl; + cgc_locals = binding; +} + +void cgc_add_global(int ident, int size, int width, ast type) { + int binding = alloc_obj(6); + heap[binding+0] = cgc_globals; + heap[binding+1] = BINDING_VAR_GLOBAL; + heap[binding+2] = ident; + heap[binding+3] = size; + heap[binding+4] = cgc_global_alloc; + heap[binding+5] = type; + cgc_global_alloc += width; + cgc_globals = binding; +} + +void cgc_add_global_fun(int ident, int label, ast type) { + int binding = alloc_obj(6); + heap[binding+0] = cgc_globals; + heap[binding+1] = BINDING_FUN; + heap[binding+2] = ident; + heap[binding+3] = 0; + heap[binding+4] = label; + heap[binding+5] = type; + cgc_globals = binding; +} + +void cgc_add_enum(int ident, int value) { + int binding = alloc_obj(4); + heap[binding+0] = cgc_globals; + heap[binding+1] = BINDING_ENUM_CST; + heap[binding+2] = ident; + heap[binding+3] = value; + cgc_globals = binding; +} + +void cgc_add_goto_label(int ident, int lbl) { + int binding = alloc_obj(5); + heap[binding+0] = cgc_locals_fun; + heap[binding+1] = BINDING_GOTO_LABEL; + heap[binding+2] = ident; + heap[binding+3] = lbl; + cgc_locals_fun = binding; +} + +void cgc_add_typedef(int ident, enum BINDING struct_or_union_or_enum, ast type) { + int binding = alloc_obj(4); + heap[binding+0] = cgc_globals; + heap[binding+1] = struct_or_union_or_enum; + heap[binding+2] = ident; + heap[binding+3] = type; + cgc_globals = binding; +} + +int cgc_lookup_binding_ident(int binding_type, int ident, int env) { + int binding = env; + while (binding != 0) { + if (heap[binding+1] == binding_type && heap[binding+2] == ident) { + break; + } + binding = heap[binding]; + } + return binding; +} + +int cgc_lookup_last_binding(int binding_type, int env) { + int binding = env; + while (binding != 0) { + if (heap[binding+1] == binding_type) { + break; + } + binding = heap[binding]; + } + return binding; +} + +int cgc_lookup_var(int ident, int env) { + int binding = env; + while (binding != 0) { + if (heap[binding+1] <= BINDING_VAR_GLOBAL && heap[binding+2] == ident) { + break; + } + binding = heap[binding]; + } + return binding; +} + +int cgc_lookup_fun(int ident, int env) { + return cgc_lookup_binding_ident(BINDING_FUN, ident, env); +} + +int cgc_lookup_enclosing_loop(int env) { + return cgc_lookup_last_binding(BINDING_LOOP, env); +} + +int cgc_lookup_enclosing_switch(int env) { + return cgc_lookup_last_binding(BINDING_SWITCH, env); +} + +int cgc_lookup_enclosing_loop_or_switch(int env) { + int binding = env; + while (binding != 0) { + if (heap[binding+1] == BINDING_LOOP || heap[binding+1] == BINDING_SWITCH) { + break; + } + binding = heap[binding]; + } + return binding; +} + +int cgc_lookup_goto_label(int ident, int env) { + return cgc_lookup_binding_ident(BINDING_GOTO_LABEL, ident, env); +} + +int cgc_lookup_struct(int ident, int env) { + return cgc_lookup_binding_ident(BINDING_TYPE_STRUCT, ident, env); +} + +int cgc_lookup_union(int ident, int env) { + return cgc_lookup_binding_ident(BINDING_TYPE_UNION, ident, env); +} + +int cgc_lookup_enum(int ident, int env) { + return cgc_lookup_binding_ident(BINDING_TYPE_ENUM, ident, env); +} + +int cgc_lookup_enum_value(int ident, int env) { + return cgc_lookup_binding_ident(BINDING_ENUM_CST, ident, env); +} diff --git a/exe.c b/exe.c index 55eb653..88b50f6 100644 --- a/exe.c +++ b/exe.c @@ -66,18 +66,12 @@ void write_i32_le(int n) { write_4_i8(n, n >> 8, n >> 16, n >> 24); } -int cgc_fs = 0; -// Function bindings that follows lexical scoping rules -int cgc_locals = 0; -// Like cgc_locals, but with 1 scope for the entire function. Used for goto labels -int cgc_locals_fun = 0; -// Global bindings -int cgc_globals = 0; -// Bump allocator used to allocate static objects -int cgc_global_alloc = 0; // If the main function returns a value bool main_returns = false; +// Environment tracking +#include "env.c" + void grow_fs(int words) { cgc_fs += words; } @@ -424,193 +418,6 @@ void def_goto_label(int lbl) { } } -enum BINDING { - // Because function params, local and global variables all share the same - // namespace, BINDING_PARAM_LOCAL, BINDING_VAR_LOCAL and BINDING_VAR_GLOBAL - // must be kept together at the beginning. - BINDING_PARAM_LOCAL, - BINDING_VAR_LOCAL, - BINDING_VAR_GLOBAL, - BINDING_ENUM_CST, - BINDING_LOOP, - BINDING_SWITCH, - BINDING_FUN, - BINDING_GOTO_LABEL, - BINDING_TYPE_STRUCT, - BINDING_TYPE_UNION, - BINDING_TYPE_ENUM, -}; - -void cgc_add_local_param(int ident, int size, ast type) { - int binding = alloc_obj(6); - heap[binding+0] = cgc_locals; - heap[binding+1] = BINDING_PARAM_LOCAL; - heap[binding+2] = ident; - heap[binding+3] = size; - heap[binding+4] = cgc_fs; - heap[binding+5] = type; - cgc_fs -= size; - cgc_locals = binding; -} - -void cgc_add_local(int ident, int size, ast type) { - int binding = alloc_obj(6); - cgc_fs += size; - heap[binding+0] = cgc_locals; - heap[binding+1] = BINDING_VAR_LOCAL; - heap[binding+2] = ident; - heap[binding+3] = size; - heap[binding+4] = cgc_fs; - heap[binding+5] = type; - cgc_locals = binding; -} - -void cgc_add_enclosing_loop(int loop_fs, int break_lbl, ast continue_lbl) { - int binding = alloc_obj(5); - heap[binding+0] = cgc_locals; - heap[binding+1] = BINDING_LOOP; - heap[binding+2] = loop_fs; - heap[binding+3] = break_lbl; - heap[binding+4] = continue_lbl; - cgc_locals = binding; -} - -void cgc_add_enclosing_switch(int loop_fs, int break_lbl, int next_case_lbl) { - int binding = alloc_obj(5); - heap[binding+0] = cgc_locals; - heap[binding+1] = BINDING_SWITCH; - heap[binding+2] = loop_fs; - heap[binding+3] = break_lbl; - heap[binding+4] = next_case_lbl; - cgc_locals = binding; -} - -void cgc_add_global(int ident, int size, int width, ast type) { - int binding = alloc_obj(6); - heap[binding+0] = cgc_globals; - heap[binding+1] = BINDING_VAR_GLOBAL; - heap[binding+2] = ident; - heap[binding+3] = size; - heap[binding+4] = cgc_global_alloc; - heap[binding+5] = type; - cgc_global_alloc += width; - cgc_globals = binding; -} - -void cgc_add_global_fun(int ident, int label, ast type) { - int binding = alloc_obj(6); - heap[binding+0] = cgc_globals; - heap[binding+1] = BINDING_FUN; - heap[binding+2] = ident; - heap[binding+3] = 0; - heap[binding+4] = label; - heap[binding+5] = type; - cgc_globals = binding; -} - -void cgc_add_enum(int ident, int value) { - int binding = alloc_obj(4); - heap[binding+0] = cgc_globals; - heap[binding+1] = BINDING_ENUM_CST; - heap[binding+2] = ident; - heap[binding+3] = value; - cgc_globals = binding; -} - -void cgc_add_goto_label(int ident, int lbl) { - int binding = alloc_obj(5); - heap[binding+0] = cgc_locals_fun; - heap[binding+1] = BINDING_GOTO_LABEL; - heap[binding+2] = ident; - heap[binding+3] = lbl; - cgc_locals_fun = binding; -} - -void cgc_add_typedef(int ident, enum BINDING struct_or_union_or_enum, ast type) { - int binding = alloc_obj(4); - heap[binding+0] = cgc_globals; - heap[binding+1] = struct_or_union_or_enum; - heap[binding+2] = ident; - heap[binding+3] = type; - cgc_globals = binding; -} - -int cgc_lookup_binding_ident(int binding_type, int ident, int env) { - int binding = env; - while (binding != 0) { - if (heap[binding+1] == binding_type && heap[binding+2] == ident) { - break; - } - binding = heap[binding]; - } - return binding; -} - -int cgc_lookup_last_binding(int binding_type, int env) { - int binding = env; - while (binding != 0) { - if (heap[binding+1] == binding_type) { - break; - } - binding = heap[binding]; - } - return binding; -} - -int cgc_lookup_var(int ident, int env) { - int binding = env; - while (binding != 0) { - if (heap[binding+1] <= BINDING_VAR_GLOBAL && heap[binding+2] == ident) { - break; - } - binding = heap[binding]; - } - return binding; -} - -int cgc_lookup_fun(int ident, int env) { - return cgc_lookup_binding_ident(BINDING_FUN, ident, env); -} - -int cgc_lookup_enclosing_loop(int env) { - return cgc_lookup_last_binding(BINDING_LOOP, env); -} - -int cgc_lookup_enclosing_switch(int env) { - return cgc_lookup_last_binding(BINDING_SWITCH, env); -} - -int cgc_lookup_enclosing_loop_or_switch(int env) { - int binding = env; - while (binding != 0) { - if (heap[binding+1] == BINDING_LOOP || heap[binding+1] == BINDING_SWITCH) { - break; - } - binding = heap[binding]; - } - return binding; -} - -int cgc_lookup_goto_label(int ident, int env) { - return cgc_lookup_binding_ident(BINDING_GOTO_LABEL, ident, env); -} - -int cgc_lookup_struct(int ident, int env) { - return cgc_lookup_binding_ident(BINDING_TYPE_STRUCT, ident, env); -} - -int cgc_lookup_union(int ident, int env) { - return cgc_lookup_binding_ident(BINDING_TYPE_UNION, ident, env); -} - -int cgc_lookup_enum(int ident, int env) { - return cgc_lookup_binding_ident(BINDING_TYPE_ENUM, ident, env); -} - -int cgc_lookup_enum_value(int ident, int env) { - return cgc_lookup_binding_ident(BINDING_ENUM_CST, ident, env); -} - // A pointer type is either an array type or a type with at least one star bool is_pointer_type(ast type) { if (get_op(type) == '[' || get_val(type) != 0) { From 1ed8ebb6f797035772eea6123d4541bc91123b22 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 25 Sep 2024 10:52:33 -0400 Subject: [PATCH 2/8] Remove OPTIMIZE_CONSTANT_PARAM optimization It will be added later but this time using `const` type qualifier instead of analyzing the code. --- sh.c | 167 ++++++++++++++--------------------------------------------- 1 file changed, 39 insertions(+), 128 deletions(-) diff --git a/sh.c b/sh.c index a1fc354..31af70c 100644 --- a/sh.c +++ b/sh.c @@ -491,16 +491,17 @@ text env_var_with_prefix(ast ident, ast prefixed_with_dollar) { if (get_op(ident) == IDENTIFIER) { var = find_var_in_local_env(get_val(ident)); if (var != -1) { - if (get_child(var, 2) == KIND_PARAM && get_child(var, 3)) { - res = wrap_int(get_child(var, 1)); - if (!prefixed_with_dollar) res = string_concat(wrap_char('$'), res); - } else { + // TODO: Constant param optimization + // if (get_child(var, 2) == KIND_PARAM) { + // res = wrap_int(get_child(var, 1)); + // if (!prefixed_with_dollar) res = string_concat(wrap_char('$'), res); + // } else { if (get_val(ident) == ARGV_ID) { res = wrap_str_lit("argv_"); } else { res = wrap_str_pool(get_val(get_val(ident))); } - } + // } } else { res = global_var(get_val(ident)); } @@ -550,12 +551,7 @@ void add_var_to_local_env(ast ident_tok, int position, int kind) { } // The var is not part of the environment, so we add it. - // Variables start as constant, and are marked as mutable by mark_mutable_variables_body. -#ifdef OPTIMIZE_CONSTANT_PARAM - var = new_ast4(LOCAL_VAR, ident_tok, position, kind, true); -#else - var = new_ast4(LOCAL_VAR, ident_tok, position, kind, false); -#endif + var = new_ast3(LOCAL_VAR, ident_tok, position, kind); local_env = new_ast2(',', var, local_env); local_env_size += 1; } @@ -587,21 +583,13 @@ void add_fun_params_to_local_env(ast lst, int position, int kind) { } } -void mark_variable_as_mutable(ast ident) { - ast var; - if (get_op(ident) == IDENTIFIER) { - var = find_var_in_local_env(get_val(ident)); - if (var != -1) { set_child(var, 3, false); } - } -} - -int variable_is_constant_param(ast local_var) { - if (local_var != -1 && get_child(local_var, 2) == KIND_PARAM && get_child(local_var, 3)) { - return true; - } else { - return false; - } -} +// int variable_is_constant_param(ast local_var) { +// if (local_var != -1 && get_child(local_var, 2) == KIND_PARAM && get_child(local_var, 3)) { +// return true; +// } else { +// return false; +// } +// } // Since global and internal variables are prefixed with _, we restrict the name // of variables to not start with _. Also, because some shells treat some @@ -659,7 +647,7 @@ int num_vars_to_save() { int counter = fun_gensym_ix; while (env != 0) { - if (!variable_is_constant_param(get_child(env, 0))) counter += 1; + counter += 1; env = get_child(env, 1); } @@ -682,7 +670,8 @@ text save_local_vars(int params_count) { while (env != 0) { local_var = get_child(env, 0); env = get_child(env, 1); - if (variable_is_constant_param(local_var)) continue; + // TODO: Constant param optimization + // if (variable_is_constant_param(local_var)) continue; ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); res = concatenate_strings_with(string_concat(wrap_char('$'), env_var_with_prefix(ident, true)), res, wrap_char(' ')); } @@ -716,7 +705,8 @@ text restore_local_vars(int params_count) { while (env != 0) { local_var = get_child(env, 0); env = get_child(env, 1); - if(variable_is_constant_param(local_var)) continue; + // TODO: Constant param optimization + // if(variable_is_constant_param(local_var)) continue; env_non_cst_size += 1; } @@ -725,7 +715,8 @@ text restore_local_vars(int params_count) { while (env != 0) { local_var = get_child(env, 0); env = get_child(env, 1); - if (variable_is_constant_param(local_var)) continue; + // TODO: Constant param optimization + // if (variable_is_constant_param(local_var)) continue; ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); res = concatenate_strings_with(string_concat5(wrap_str_lit("$(("), env_var_with_prefix(ident, true), wrap_str_lit(" = $"), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_count + env_non_cst_size - local_var_pos), true), wrap_str_lit("))")), res, wrap_char(' ')); local_var_pos += 1; @@ -747,7 +738,6 @@ text restore_local_vars(int params_count) { // Save the value of local variables to positional parameters text let_params(int params) { ast ident; - ast local_var; text res = 0; int params_ix = 2; @@ -756,11 +746,12 @@ text let_params(int params) { runtime_use_local_vars = true; while (params != 0) { - local_var = find_var_in_local_env(get_child(get_child(params, 0), 0)); - if (!variable_is_constant_param(local_var)) { - ident = new_ast0(IDENTIFIER, get_child(get_child(params, 0), 0)); - res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); - } + // TODO: Constant param optimization + // local_var = find_var_in_local_env(get_child(get_child(params, 0), 0)); + // if (!variable_is_constant_param(local_var)) { + ident = new_ast0(IDENTIFIER, get_child(get_child(params, 0), 0)); + res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); + // } params = get_child(params, 1); params_ix += 1; } @@ -792,8 +783,10 @@ text save_local_vars(int params_count) { #ifdef SH_INITIALIZE_PARAMS_WITH_LET if (local_var != -1 && get_child(local_var, 2) != KIND_PARAM) { // Skip params #else + // TODO: Constant param optimization // Constant function parameters are assigned to $1, $2, ... and don't need to be saved - if (!variable_is_constant_param(local_var)) { + if (true) + // if (!variable_is_constant_param(local_var)) { #endif ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); res = concatenate_strings_with(string_concat(wrap_str_lit("let "), env_var_with_prefix(ident, true)), res, wrap_str_lit("; ")); @@ -828,10 +821,11 @@ text restore_local_vars(int params_count) { local_var = get_child(env, 0); // Constant function parameters are assigned to $1, $2, ... and don't need to be saved - if (!variable_is_constant_param(local_var)) { + // TODO: Constant param optimization + // if (!variable_is_constant_param(local_var)) { ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); res = concatenate_strings_with(res, env_var(ident), wrap_char(' ')); - } + // } env = get_child(env, 1); } @@ -2091,87 +2085,6 @@ ast get_leading_var_declarations(ast node) { return new_ast2(',', result, node); } -#ifdef OPTIMIZE_CONSTANT_PARAM -void mark_mutable_variables_statement(ast node) { - int op = get_op(node); - ast params; - - if (node == 0) return; - - if (op == IF_KW) { - mark_mutable_variables_statement(get_child(node, 0)); - if (get_child(node, 1)) mark_mutable_variables_body(get_child(node, 1)); - if (get_child(node, 2)) mark_mutable_variables_statement(get_child(node, 2)); - } else if (op == WHILE_KW) { - mark_mutable_variables_statement(get_child(node, 0)); - if (get_child(node, 1)) mark_mutable_variables_body(get_child(node, 1)); - } else if (op == DO_KW) { - if (get_child(node, 0)) mark_mutable_variables_statement(get_child(node, 0)); - mark_mutable_variables_body(get_child(node, 1)); - } else if (op == FOR_KW) { - if (get_child(node, 0)) mark_mutable_variables_statement(get_child(node, 0)); - if (get_child(node, 1)) mark_mutable_variables_statement(get_child(node, 1)); - if (get_child(node, 2)) mark_mutable_variables_statement(get_child(node, 2)); - if (get_child(node, 3)) mark_mutable_variables_body(get_child(node, 2)); - } else if (op == SWITCH_KW) { - mark_mutable_variables_statement(get_child(node, 0)); - if (get_child(node, 1)) mark_mutable_variables_statement(get_child(node, 1)); - } else if (op == BREAK_KW || op == CONTINUE_KW || op == GOTO_KW) { - // Do nothing - } else if (op == ':' || op == CASE_KW || op == DEFAULT_KW) { - mark_mutable_variables_statement(get_child(node, op == DEFAULT_KW ? 0 : 1)); - } else if (op == RETURN_KW) { - if (get_child(node, 0) != 0) mark_mutable_variables_statement(get_child(node, 0)); - } else if (op == '(') { - params = get_child(node, 1); - - if (params != 0) { // Check if not an empty list - if (get_op(params) == ',') { - while (get_op(params) == ',') { - mark_mutable_variables_statement(get_child(params, 0)); - params = get_child(params, 1); - } - } else { // params is the first argument, not wrapped in a cons cell - mark_mutable_variables_statement(params); - } - } - } else if (op == '{') { // six.compound - mark_mutable_variables_body(node); - } else if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR || op == INTEGER || op == CHARACTER || op == STRING) { - // Do nothing - } else if (op == '=' || op == PLUS_PLUS_PRE || op == MINUS_MINUS_PRE || op == PLUS_PLUS_POST || op == MINUS_MINUS_POST - || op == PLUS_EQ || op == AMP_EQ || op == BAR_EQ || op == CARET_EQ || op == LSHIFT_EQ || op == MINUS_EQ - || op == PERCENT_EQ || op == PLUS_EQ || op == RSHIFT_EQ || op == SLASH_EQ || op == STAR_EQ || op == SIZEOF_KW) { - mark_variable_as_mutable(get_child(node, 0)); - if (get_nb_children(node) == 2) mark_mutable_variables_statement(get_child(node, 1)); - } else if (op == '~' || op == '!' - || op == '&' || op == '|' || op == '<' || op == '>' || op == '+' || op == '-' || op == '*' || op == '/' - || op == '%' || op == '^' || op == ',' || op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ - || op == LSHIFT || op == RSHIFT || op == '=' || op == '[' || op == AMP_AMP || op == BAR_BAR || op == '.' || op == ARROW) { - mark_mutable_variables_statement(get_child(node, 0)); - if (get_nb_children(node) == 2) mark_mutable_variables_statement(get_child(node, 1)); - } else if (op == CAST) { - mark_mutable_variables_statement(get_child(node, 1)); // child 0 is the type - } else if (op == '?') { - mark_mutable_variables_statement(get_child(node, 0)); - mark_mutable_variables_statement(get_child(node, 1)); - mark_mutable_variables_statement(get_child(node, 2)); - } else { - printf("op=%d %c\n", op, op); - fatal_error("mark_mutable_variables_statement: unknown statement"); - } -} - -void mark_mutable_variables_body(ast node) { - if (node != 0) { - while (get_op(node) == '{') { - mark_mutable_variables_statement(get_child(node, 0)); - node = get_child(node, 1); - } - } -} -#endif - void comp_glo_fun_decl(ast node) { ast name = get_child(node, 0); ast fun_type = get_child(node, 1); @@ -2207,10 +2120,6 @@ void comp_glo_fun_decl(ast node) { add_fun_params_to_local_env(params, 2, KIND_PARAM); // Start position at 2 because 1 is taken by result_loc add_vars_to_local_env(local_vars, local_env_size + 2, KIND_LOCAL); -#ifdef OPTIMIZE_CONSTANT_PARAM - mark_mutable_variables_body(body); -#endif - #ifdef SH_INITIALIZE_PARAMS_WITH_LET trailing_txt = let_params(params); if (trailing_txt != 0) trailing_txt = string_concat(wrap_char(' '), trailing_txt); @@ -2247,10 +2156,11 @@ void comp_glo_fun_decl(ast node) { while (params != 0) { var = get_child(params, 0); + // TODO: Constant param optimization // Constant parameters don't need to be initialized - if (!variable_is_constant_param(find_var_in_local_env(get_val(var)))) { + // if (!variable_is_constant_param(find_var_in_local_env(get_val(var)))) { comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), new_ast0(IDENTIFIER_DOLLAR, params_ix)); - } + // } params = get_child(params, 1); params_ix += 1; @@ -2532,9 +2442,10 @@ void initialize_function_variables() { local_var = get_child(env, 0); ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); - if (!variable_is_constant_param(local_var)) { + // TODO: Constant param optimization + // if (!variable_is_constant_param(local_var)) { res = concatenate_strings_with(res, env_var(ident), wrap_str_lit(" = ")); - } + // } env = get_child(env, 1); } From 5acef76ca348c992e1862bbf7ee75e897072ad8f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 25 Sep 2024 17:19:03 -0400 Subject: [PATCH 3/8] Add script to track changes to pnut-sh.sh --- diff-pnut-sh.sh | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 diff-pnut-sh.sh diff --git a/diff-pnut-sh.sh b/diff-pnut-sh.sh new file mode 100755 index 0000000..b0d491a --- /dev/null +++ b/diff-pnut-sh.sh @@ -0,0 +1,38 @@ +#! /bin/sh + +set -e + +: ${PNUT_OPTIONS:=} # Default to empty options + +TEMP_DIR="build" +PNUT_SH_OPTIONS="$PNUT_OPTIONS -DRT_NO_INIT_GLOBALS -Dsh" +PNUT_SH_OPTIONS_FAST="$PNUT_SH_OPTIONS -DSH_SAVE_VARS_WITH_SET -DOPTIMIZE_CONSTANT_PARAM" +PNUT_SH_FILE_ORIGINAL="$TEMP_DIR/pnut-sh-original.sh" +PNUT_SH_FILE_FRESH="$TEMP_DIR/pnut-sh.sh" + +# Parse the arguments +init=0 + +while [ $# -gt 0 ]; do + case $1 in + --fast) PNUT_SH_OPTIONS="$PNUT_SH_OPTIONS_FAST"; shift 1 ;; + --init) init=1; shift 1 ;; + *) echo "Unknown option: $1"; exit 1;; + esac +done + +if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi + +gcc -o "$TEMP_DIR/pnut.exe" $PNUT_SH_OPTIONS pnut.c + +if [ $init -eq 1 ]; then + ./$TEMP_DIR/pnut.exe $PNUT_SH_OPTIONS "pnut.c" > "$PNUT_SH_FILE_ORIGINAL" + exit 0 +fi + +if [ ! -f "$PNUT_SH_FILE_ORIGINAL" ]; then + echo "$PNUT_SH_FILE_ORIGINAL not found. Run this script with --init first." + exit 1 +fi +./$TEMP_DIR/pnut.exe $PNUT_SH_OPTIONS "pnut.c" > "$PNUT_SH_FILE_FRESH" +diff -w "$PNUT_SH_FILE_ORIGINAL" "$PNUT_SH_FILE_FRESH" From 0fcc364dc8646399bf8f93a19cbad34a9ca1758a Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 25 Sep 2024 17:39:36 -0400 Subject: [PATCH 4/8] Allow local variable declarations anywhere Local scoping rules are still to be implemented, but I want to make more changes so splitting this work in smaller commits to make reviewing easier. --- sh.c | 193 +++++++++++++++++++++-------------------------------------- 1 file changed, 68 insertions(+), 125 deletions(-) diff --git a/sh.c b/sh.c index 31af70c..245090d 100644 --- a/sh.c +++ b/sh.c @@ -297,7 +297,8 @@ int in_tail_position = false; // Is the current statement in tail position? int loop_nesting_level = 0; // Number of loops surrounding the current statement int loop_end_actions_start = 0; // Start position of declarations for the last action in a for loop int loop_end_actions_end = 0; // End position of declarations for the last action in a for loop -ast local_env = 0; // List of local variables +ast local_env = 0; // List of local variables (following local scoping rules) +ast local_env_commulative = 0; // List of local variables (cummulative for the whole function) ast local_env_size = 0; // Size of the environment int gensym_ix = 0; // Counter for fresh_ident int fun_gensym_ix = 0; // Maximum value of gensym_ix for the current function @@ -427,8 +428,7 @@ void print_glo_decls() { } } -ast find_var_in_local_env(ast ident_tok) { - ast env = local_env; +ast find_var_in_local_env(ast ident_tok, ast env) { ast var; while (env != 0) { var = get_child(env, 0); @@ -485,31 +485,25 @@ text global_var(ast ident_tok) { } text env_var_with_prefix(ast ident, ast prefixed_with_dollar) { - ast var; - text res; - if (get_op(ident) == IDENTIFIER) { - var = find_var_in_local_env(get_val(ident)); - if (var != -1) { + if (find_var_in_local_env(get_val(ident), local_env) != -1) { // TODO: Constant param optimization // if (get_child(var, 2) == KIND_PARAM) { // res = wrap_int(get_child(var, 1)); // if (!prefixed_with_dollar) res = string_concat(wrap_char('$'), res); // } else { if (get_val(ident) == ARGV_ID) { - res = wrap_str_lit("argv_"); + return wrap_str_lit("argv_"); } else { - res = wrap_str_pool(get_val(get_val(ident))); + return wrap_str_pool(get_val(get_val(ident))); } // } } else { - res = global_var(get_val(ident)); + return global_var(get_val(ident)); } } else { - res = format_special_var(ident, prefixed_with_dollar); + return format_special_var(ident, prefixed_with_dollar); } - - return res; } text env_var(ast ident) { @@ -543,43 +537,30 @@ ast fresh_string_ident() { // 2. Position of the variable in the shell environment ($1, $2, ...) // 3. Kind of variable (function param or local var) // 4. Constant: if the variable is never assigned to -void add_var_to_local_env(ast ident_tok, int position, int kind) { +void add_var_to_local_env(ast ident_tok, int kind) { ast var; // Check if the variable is not in env. This should always do nothing - if (find_var_in_local_env(ident_tok) != -1) { + if (find_var_in_local_env(ident_tok, local_env) != -1) { fatal_error("add_var_to_local_env: variable already in local environment"); } // The var is not part of the environment, so we add it. - var = new_ast3(LOCAL_VAR, ident_tok, position, kind); + local_env_size += 1; // Pre-increment the size because positions are 1-indexed + var = new_ast3(LOCAL_VAR, ident_tok, local_env_size, kind); local_env = new_ast2(',', var, local_env); - local_env_size += 1; -} -void add_vars_to_local_env(ast lst, int position, int kind) { - ast decls; - ast variables; - ast variable; - while (lst != 0) { - decls = get_child(lst, 0); // VAR_DECLS node - variables = get_child(decls, 0); // List of variables - while(variables != 0){ // Loop through the list of variables - variable = get_child(variables, 0); // Variable node - add_var_to_local_env(get_child(variable, 0), position, kind); - variables = get_child(variables, 1); - position += 1; // Increment position - } - lst = get_child(lst, 1); + // Add the variable to the commulative environment + if (find_var_in_local_env(ident_tok, local_env_commulative) == -1) { + local_env_commulative = new_ast2(',', var, local_env_commulative); } } -void add_fun_params_to_local_env(ast lst, int position, int kind) { +void add_fun_params_to_local_env(ast lst) { ast decl; while (lst != 0) { decl = get_child(lst, 0); - add_var_to_local_env(get_child(decl, 0), position, kind); + add_var_to_local_env(get_child(decl, 0), KIND_PARAM); lst = get_child(lst, 1); - position += 1; } } @@ -622,28 +603,15 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for } } -void assert_vars_are_safe(ast lst, bool local) { - ast decls; - ast variables; - ast variable; - while(lst != 0){ - if(get_op(get_child(lst, 0)) == VAR_DECLS){ // If it's a list of declarations - decls = get_child(lst, 0); - variables = get_child(decls, 0); - while(variables != 0) { // Loop through the list of variables - variable = get_child(variables, 0); - assert_var_decl_is_safe(variable, local); // Check the variables - variables = get_child(variables, 1); - } - } else{ - assert_var_decl_is_safe(get_child(lst, 0), local); // Check the variable - } +void check_param_decls(ast lst) { + while (lst != 0) { + assert_var_decl_is_safe(get_child(lst, 0), true); lst = get_child(lst, 1); } } int num_vars_to_save() { - ast env = local_env; + ast env = local_env_commulative; int counter = fun_gensym_ix; while (env != 0) { @@ -656,8 +624,8 @@ int num_vars_to_save() { #ifdef SH_SAVE_VARS_WITH_SET // Save the value of local variables to positional parameters -text save_local_vars(int params_count) { - ast env = local_env; +text save_local_vars() { + ast env = local_env_commulative; ast local_var; ast ident; text res = 0; @@ -687,7 +655,7 @@ text save_local_vars(int params_count) { // Restore the previous value of local variables from positional parameters text restore_local_vars(int params_count) { - ast env = local_env; + ast env = local_env_commulative; ast local_var; ast ident; // Position of the saved local vars, starting from 0 @@ -705,16 +673,18 @@ text restore_local_vars(int params_count) { while (env != 0) { local_var = get_child(env, 0); env = get_child(env, 1); + // TODO: Constant param optimization // if(variable_is_constant_param(local_var)) continue; env_non_cst_size += 1; } - env = local_env; + env = local_env_commulative; while (env != 0) { local_var = get_child(env, 0); env = get_child(env, 1); + // TODO: Constant param optimization // if (variable_is_constant_param(local_var)) continue; ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); @@ -747,7 +717,7 @@ text let_params(int params) { while (params != 0) { // TODO: Constant param optimization - // local_var = find_var_in_local_env(get_child(get_child(params, 0), 0)); + // local_var = find_var_in_local_env(get_child(get_child(params, 0), 0), local_env); // if (!variable_is_constant_param(local_var)) { ident = new_ast0(IDENTIFIER, get_child(get_child(params, 0), 0)); res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); @@ -760,8 +730,8 @@ text let_params(int params) { } #endif -text save_local_vars(int params_count) { - ast env = local_env; +text save_local_vars() { + ast env = local_env_commulative; ast local_var; ast ident; text res = 0; @@ -801,7 +771,7 @@ text save_local_vars(int params_count) { // The only difference between save_local_vars and restore_local_vars is the // order of the arguments and the call to unsave_vars instead of save_vars. text restore_local_vars(int params_count) { - ast env = local_env; + ast env = local_env_commulative; ast local_var; ast ident; text res = 0; @@ -1950,6 +1920,28 @@ bool comp_loop(text cond, ast body, ast loop_end_stmt, text last_line, STMT_CTX return cond == wrap_char(':') && always_returns; } +void comp_var_decls(ast node) { + ast var_decl; + + node = get_child(node, 0); + while (node != 0) { + // Add to local env and cummulative env, then initialize + var_decl = get_child(node, 0); + // printf("Adding var %s\n", string_pool + get_val(get_child(var_decl, 0))); + add_var_to_local_env(get_child(var_decl, 0), KIND_LOCAL); + if (get_child(var_decl, 2) != 0) { + comp_assignment(new_ast0(IDENTIFIER, get_child(var_decl, 0)), get_child(var_decl, 2)); + } +#ifdef INITIALIZE_LOCAL_VARS_WITH_ZERO + else { + comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), new_ast0(INTEGER, 0)); + } +#endif + // TODO: Cummulative env + node = get_child(node, 1); // Next variable + } +} + // Returns whether the statement always returns/breaks. // This is used to delimit the end of conditional blocks of switch statements. bool comp_statement(ast node, STMT_CTX stmt_ctx) { @@ -2050,7 +2042,7 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { fatal_error("case/default must be at the beginning of a switch conditional block"); return false; } else if (op == VAR_DECLS) { - fatal_error("variable declaration must be at the beginning of a function"); + comp_var_decls(node); return false; } else { str = comp_rvalue(node, RVALUE_CTX_BASE); @@ -2061,52 +2053,25 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { } } -ast get_leading_var_declarations(ast node) { - ast result = 0; - ast local_var; - ast tail; - ast new_tail; - - if (get_op(node) == '{') { - while (get_op(node) == '{') { - local_var = get_child(node, 0); - if (get_op(local_var) != VAR_DECLS) break; - - // Initialize list - new_tail = new_ast2(',', local_var, 0); - if (result == 0) { result = new_tail; } - else { set_child(tail, 1, new_tail); } - tail = new_tail; - - node = get_child(node, 1); - } - } - - return new_ast2(',', result, node); -} - void comp_glo_fun_decl(ast node) { ast name = get_child(node, 0); ast fun_type = get_child(node, 1); ast params = get_child(node, 2); ast body = get_child(node, 3); - ast local_vars_and_body, local_vars; text trailing_txt = 0; - int params_ix; - ast decls, vars, var; + int params_ix = 2; // Start at 2 because $1 is assigned to the return location + ast var; int save_loc_vars_fixup; int start_glo_decl_idx; if (body == -1) return; // ignore forward declarations - local_vars_and_body = get_leading_var_declarations(get_child(node, 3)); - local_vars = get_child(local_vars_and_body, 0); - body = get_child(local_vars_and_body, 1); + local_env_size = 1; // Start at 1 because $1 is assigned to the return location top_level_stmt = false; - assert_vars_are_safe(params, true); - assert_vars_are_safe(local_vars, true); + check_param_decls(params); + add_fun_params_to_local_env(params); // If the function is main if (name == MAIN_ID) { @@ -2117,9 +2082,6 @@ void comp_glo_fun_decl(ast node) { if (get_op(fun_type) != VOID_KW) main_returns = true; } - add_fun_params_to_local_env(params, 2, KIND_PARAM); // Start position at 2 because 1 is taken by result_loc - add_vars_to_local_env(local_vars, local_env_size + 2, KIND_LOCAL); - #ifdef SH_INITIALIZE_PARAMS_WITH_LET trailing_txt = let_params(params); if (trailing_txt != 0) trailing_txt = string_concat(wrap_char(' '), trailing_txt); @@ -2127,7 +2089,6 @@ void comp_glo_fun_decl(ast node) { if (trailing_txt == 0) { // Show the mapping between the function parameters and $1, $2, etc. - params_ix = 2; // Start at 2 because $1 is assigned to result location while (params != 0) { var = get_child(params, 0); trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(get_val(get_val(var))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); @@ -2158,7 +2119,7 @@ void comp_glo_fun_decl(ast node) { // TODO: Constant param optimization // Constant parameters don't need to be initialized - // if (!variable_is_constant_param(find_var_in_local_env(get_val(var)))) { + // if (!variable_is_constant_param(find_var_in_local_env(get_val(var)), local_env)) { comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), new_ast0(IDENTIFIER_DOLLAR, params_ix)); // } @@ -2167,35 +2128,18 @@ void comp_glo_fun_decl(ast node) { } #endif - // Initialize local vars - while (local_vars != 0) { - decls = get_child(local_vars, 0); // List of VAR_DECLS - vars = get_child(decls, 0); // VAR_DECL list - while(vars != 0) { - var = get_child(vars, 0); // Single VAR_DECL - // TODO: Replace with ternary expression? - if (get_child(var, 2) != 0) { - comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), get_child(var, 2)); - } -#ifdef INITIALIZE_LOCAL_VARS_WITH_ZERO - else { - comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), new_ast0(INTEGER, 0)); - } -#endif - vars = get_child(vars, 1); // Next VAR_DECL - } - local_vars = get_child(local_vars, 1); - } - comp_body(body, STMT_CTX_DEFAULT); // functions cannot be empty so we insert ':' if it's empty if (!any_active_glo_decls(start_glo_decl_idx)) append_glo_decl(wrap_char(':')); + // Set local environment to cummulative for the save_local_vars/restore_local_vars + local_env = local_env_commulative; + append_glo_decl(restore_local_vars(params_ix - 1)); // We only know the full set of temporary variables after compiling the function body. // So we fixup the calls to save_vars and unsave_vars at the end. - fixup_glo_decl(save_loc_vars_fixup, save_local_vars(params_ix - 1)); + fixup_glo_decl(save_loc_vars_fixup, save_local_vars()); while (rest_loc_var_fixups != 0) { fixup_glo_decl(get_child(rest_loc_var_fixups, 0), restore_local_vars(params_ix - 1)); rest_loc_var_fixups = get_child(rest_loc_var_fixups, 1); @@ -2426,7 +2370,7 @@ void epilogue() { // Initialize local and synthetic variables used by function void initialize_function_variables() { - ast env = local_env; + ast env = local_env_commulative; ast local_var; ast ident; text res = 0; @@ -2440,14 +2384,14 @@ void initialize_function_variables() { while (env != 0) { local_var = get_child(env, 0); + env = get_child(env, 1); + ident = new_ast0(IDENTIFIER, get_child(local_var, 0)); // TODO: Constant param optimization // if (!variable_is_constant_param(local_var)) { res = concatenate_strings_with(res, env_var(ident), wrap_str_lit(" = ")); // } - - env = get_child(env, 1); } if (res != 0) { @@ -2470,8 +2414,7 @@ void codegen_glo_decl(ast decl) { print_glo_decls(); // Reset state glo_decl_ix = 0; - local_env_size = 0; - local_env = 0; + local_env = local_env_commulative = 0; // Reset local environment max_text_alloc = max_text_alloc > text_alloc ? max_text_alloc : text_alloc; cumul_text_alloc += text_alloc; text_alloc = 1; From f52ed9bf2120bb269638334ab9f6f03434bdb8fc Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 25 Sep 2024 18:52:51 -0400 Subject: [PATCH 5/8] Add basic test --- tests/_sh/scoping.c | 12 ++++++++++++ tests/_sh/scoping.golden | 3 +++ 2 files changed, 15 insertions(+) create mode 100644 tests/_sh/scoping.c create mode 100644 tests/_sh/scoping.golden diff --git a/tests/_sh/scoping.c b/tests/_sh/scoping.c new file mode 100644 index 0000000..54f9ca9 --- /dev/null +++ b/tests/_sh/scoping.c @@ -0,0 +1,12 @@ +#include + +int glo1 = 12; + +void main() { + printf("%d\n", glo1); + { + int glo1 = 13; + printf("%d\n", glo1); + } + printf("%d\n", glo1); +} diff --git a/tests/_sh/scoping.golden b/tests/_sh/scoping.golden new file mode 100644 index 0000000..c1a5858 --- /dev/null +++ b/tests/_sh/scoping.golden @@ -0,0 +1,3 @@ +12 +13 +12 From b8cd835e26a0b8c46049c20934ee80e2c1360b69 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 25 Sep 2024 18:53:03 -0400 Subject: [PATCH 6/8] Regenerate repl.sh example --- examples/compiled/repl.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/compiled/repl.sh b/examples/compiled/repl.sh index c980131..3cbbb1e 100755 --- a/examples/compiled/repl.sh +++ b/examples/compiled/repl.sh @@ -445,16 +445,16 @@ _symbol_ref() { # n: $2 : $((__tmp = $1)) $((n = $3)) $((__t1 = $4)) $(($1 = __tmp)) } -: $((__t2 = __t1 = root = sym = list = name = 0)) +: $((__t1 = root = sym = list = name = 0)) _create_sym() { # name: $2 - set $@ $name $list $sym $root $__t1 $__t2 + set $@ $name $list $sym $root $__t1 name=$2 - _lst_length __t2 $name - _alloc_rib list $name $__t2 $(((3 << 1) | 1)) + _lst_length __t1 $name + _alloc_rib list $name $__t1 $(((3 << 1) | 1)) _alloc_rib sym $_FALSE $list $(((2 << 1) | 1)) _alloc_rib root $sym $_symbol_table $(((0 << 1) | 1)) : $(($1 = root)) - : $((__tmp = $1)) $((name = $3)) $((list = $4)) $((sym = $5)) $((root = $6)) $((__t1 = $7)) $((__t2 = $8)) $(($1 = __tmp)) + : $((__tmp = $1)) $((name = $3)) $((list = $4)) $((sym = $5)) $((root = $6)) $((__t1 = $7)) $(($1 = __tmp)) } : $((__t1 = c = accum = n = 0)) From 995eca22e72180ce61f0f89d1c33fa1b361f1081 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 27 Sep 2024 22:17:41 -0400 Subject: [PATCH 7/8] Make it easy to add locals to different env --- env.c | 23 ++++++++++------------- exe.c | 2 +- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/env.c b/env.c index 1927dcf..b91e11b 100644 --- a/env.c +++ b/env.c @@ -1,7 +1,7 @@ int cgc_fs = 0; // Function bindings that follows lexical scoping rules int cgc_locals = 0; -// Like cgc_locals, but with 1 scope for the entire function. Used for goto labels +// Like cgc_locals, but with 1 scope for the entire function int cgc_locals_fun = 0; // Global bindings int cgc_globals = 0; @@ -26,32 +26,29 @@ enum BINDING { BINDING_TYPE_ENUM, }; -void cgc_add_local_param(int ident, int size, ast type) { +int cgc_add_local(enum BINDING binding_type, int ident, int size, ast type) { int binding = alloc_obj(6); heap[binding+0] = cgc_locals; - heap[binding+1] = BINDING_PARAM_LOCAL; + heap[binding+1] = binding_type; heap[binding+2] = ident; heap[binding+3] = size; heap[binding+4] = cgc_fs; heap[binding+5] = type; + return binding; +} + +void cgc_add_local_param(int ident, int size, ast type) { + cgc_locals = cgc_add_local(BINDING_PARAM_LOCAL, ident, size, type); #ifdef sh cgc_fs += size; #else cgc_fs -= size; #endif - cgc_locals = binding; } -void cgc_add_local(int ident, int size, ast type) { - int binding = alloc_obj(6); +void cgc_add_local_var(int ident, int size, ast type) { cgc_fs += size; - heap[binding+0] = cgc_locals; - heap[binding+1] = BINDING_VAR_LOCAL; - heap[binding+2] = ident; - heap[binding+3] = size; - heap[binding+4] = cgc_fs; - heap[binding+5] = type; - cgc_locals = binding; + cgc_locals = cgc_add_local(BINDING_VAR_LOCAL, ident, size, type); } void cgc_add_enclosing_loop(int loop_fs, int break_lbl, ast continue_lbl) { diff --git a/exe.c b/exe.c index 88b50f6..e26ad91 100644 --- a/exe.c +++ b/exe.c @@ -1544,7 +1544,7 @@ void codegen_body(ast node) { size = 1; } - cgc_add_local(name, size, type); + cgc_add_local_var(name, size, type); decls = get_child(decls, 1); // Move to the next declaration in the list } From b7f7da26c80fc4970d450ac3f44d12201bd32a73 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 30 Sep 2024 22:49:02 -0400 Subject: [PATCH 8/8] Support scoping of local variables --- sh.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/sh.c b/sh.c index 245090d..cf48878 100644 --- a/sh.c +++ b/sh.c @@ -1729,15 +1729,21 @@ void comp_assignment(ast lhs, ast rhs) { bool comp_body(ast node, STMT_CTX stmt_ctx) { int start_in_tail_position = in_tail_position; + ast start_local_env = local_env; + int start_local_env_size = local_env_size; + in_tail_position = false; while (node != 0) { // Last statement of body is in tail position if the body itself is in tail position if (get_op(get_child(node, 1)) != '{') in_tail_position = start_in_tail_position; - if (comp_statement(get_child(node, 0), stmt_ctx)) return true; // Statement always returns => block is terminated + if (comp_statement(get_child(node, 0), stmt_ctx)) break; // Statement always returns => block is terminated node = get_child(node, 1); } - return false; + + local_env = start_local_env; + local_env_size = start_local_env_size; + return node != 0; // If node is not null, it means the block was terminated early } // Assemble switch pattern from case and default statements. @@ -1777,6 +1783,8 @@ text make_switch_pattern(ast statement) { bool comp_switch(ast node) { int start_in_tail_position = in_tail_position; ast statement; + ast start_local_env = local_env; + int start_local_env_size = local_env_size; append_glo_decl(string_concat3( wrap_str_lit("case "), @@ -1818,6 +1826,9 @@ bool comp_switch(ast node) { nest_level -= 1; append_glo_decl(wrap_str_lit("esac")); + local_env = start_local_env; + local_env_size = start_local_env_size; + // Returning not-false is only important for nested switch statements. // It could be useful to remove the need for the redundant trailing return // when nesting switch statements that we know are exhaustive such as in @@ -1836,6 +1847,8 @@ bool comp_if(ast node, STMT_CTX stmt_ctx) { int start_glo_decl_idx; bool termination_lhs = false; bool termination_rhs = false; + ast start_local_env = local_env; + int start_local_env_size = local_env_size; bool else_if = stmt_ctx & STMT_CTX_ELSE_IF; stmt_ctx = stmt_ctx & ~STMT_CTX_ELSE_IF; // Clear STMT_CTX_ELSE_IF bit to not pass it to the next if statement @@ -1872,6 +1885,9 @@ bool comp_if(ast node, STMT_CTX stmt_ctx) { fatal_error("Early break out of a switch case is unsupported"); } + local_env = start_local_env; + local_env_size = start_local_env_size; + return termination_lhs && termination_rhs; } @@ -1883,6 +1899,8 @@ bool comp_loop(text cond, ast body, ast loop_end_stmt, text last_line, STMT_CTX // Save loop end actions from possible outer loop int start_loop_end_actions_start = loop_end_actions_start; int start_loop_end_actions_end = loop_end_actions_end; + ast start_local_env = local_env; + int start_local_env_size = local_env_size; int start_glo_decl_idx; bool always_returns = false; @@ -1915,6 +1933,8 @@ bool comp_loop(text cond, ast body, ast loop_end_stmt, text last_line, STMT_CTX loop_end_actions_start = start_loop_end_actions_start; loop_end_actions_end = start_loop_end_actions_end; + local_env = start_local_env; + local_env_size = start_local_env_size; // If the condition is always true and the loop always returns return cond == wrap_char(':') && always_returns;