diff --git a/debug.c b/debug.c index 2b66094d..3784625b 100644 --- a/debug.c +++ b/debug.c @@ -1,5 +1,3 @@ -// Shell backend already defines a print_string_char -#ifndef sh void print_string_char(int c) { if (c == 7) putstr("\\a"); else if (c == 8) putstr("\\b"); @@ -12,12 +10,9 @@ void print_string_char(int c) { else if (c < 32 || c > 126) { putchar('\\'); putint(c >> 6); putint((c >> 3) & 7); putint(c & 7); } else putchar(c); } -#else -void print_string_char(int c); -#endif +int print_tok_indent = 0; void print_tok(int tok, int val) { - int i; if (tok == AUTO_KW) putstr("auto"); @@ -101,15 +96,20 @@ void print_tok(int tok, int val) { putchar('"'); } else if (tok == MACRO_ARG) { putstr("ARG["); putint(val); putstr("]"); + } else if (tok == '{') { + putchar(tok); + print_tok_indent += 2; + } else if (tok == '}') { + print_tok_indent -= 2; + putchar(tok); + } else if (tok == '\n') { + putchar(tok); + for (i = 0; i < print_tok_indent; i++) putchar(' '); } else { putchar(tok); } - if (tok == ';') { // Simple heuristic to print newlines. This makes the output more readable. - putchar('\n'); - } else { - putchar(' '); - } + if (tok != '\n') putchar(' '); } // Show the type of a token. @@ -181,6 +181,7 @@ void print_tok_type(int tok) { else if (tok == MACRO) putstr("macro"); else if (tok == MACRO_ARG) putstr("macro argument"); else if (tok == EOF) putstr("end of file"); + else if (tok == '\n') putstr("newline"); else { putchar('\''); putchar(tok); putchar('\''); } } diff --git a/pnut.c b/pnut.c index 83edbade..348280c8 100644 --- a/pnut.c +++ b/pnut.c @@ -76,8 +76,10 @@ #endif // Options that turns Pnut into a C preprocessor or some variant of it +// DEBUG_GETCHAR: Read and print the input character by character. // DEBUG_CPP: Run preprocessor like gcc -E. This can be useful for debugging the preprocessor. // DEBUG_EXPAND_INCLUDES: Reads the input file and includes the contents of the included files. +// DEBUG_PARSER: Runs the tokenizer on the input. Outputs nothing. typedef int bool; @@ -948,19 +950,6 @@ int read_macro_tokens(int args) { return toks; } -#ifdef DEBUG_CPP -void print_macro_raw_tokens(int tokens) { - int i = 0; - while (tokens != 0) { - // print_tok(car(car(tokens)), cdr(car(tokens))); - putchar(car(car(tokens))); putchar('('); putint(car(car(tokens))); putchar(')'); - tokens = cdr(tokens); - i += 1; - } - putstr("("); putint(i); putstr(" tokens)"); -} -#endif - // A few things that are different from the standard: // - We allow sequence of commas in the argument list // - Function-like macros with 0 arguments can be called either without parenthesis or with (). @@ -1003,22 +992,6 @@ void handle_define() { // Accumulate tokens so they can be replayed when the macro is used heap[macro + 3] = cons(read_macro_tokens(args), args_count); -#ifdef DEBUG_CPP - putstr("# "); - putstr(string_pool + heap[macro + 1]); - if (args_count != -1) putchar('('); // Function-like macro - - while (args_count > 0) { - putstr(string_pool + heap[car(args) + 1]); - args = cdr(args); - args_count -= 1; - if (args_count > 0) putstr(", "); - } - - if (args_count != -1) putstr(") "); - print_macro_raw_tokens(car(heap[macro + 3])); - putchar('\n'); -#endif } int eval_constant(ast expr, bool if_macro) { @@ -1630,7 +1603,6 @@ void paste_tokens(int left_tok, int left_val) { void get_tok() { - bool first_time = true; // Used to simulate a do-while loop #ifdef SH_INCLUDE_C_CODE int prev_char_buf_ix = declaration_char_buf_ix; // Save the cursor in a local variable so we can restore it when the token is @@ -1645,8 +1617,7 @@ void get_tok() { #endif // This outer loop is used to skip over tokens removed by #ifdef/#ifndef/#else - while (first_time || !if_macro_mask) { - first_time = false; + do { #ifdef SH_INCLUDE_C_CODE declaration_char_buf_ix = prev_char_buf_ix; // Skip over tokens that are masked off #endif @@ -2020,7 +1991,8 @@ void get_tok() { } } } - } + } while (!if_macro_mask); + #ifdef SH_INCLUDE_C_CODE last_tok_char_buf_ix = prev_last_tok_char_buf_ix - 1; #endif @@ -3145,7 +3117,7 @@ ast parse_compound_statement() { // Select code generator -#if !(defined DEBUG_CPP) && !(defined DEBUG_EXPAND_INCLUDES) +#if !(defined DEBUG_CPP) && !(defined DEBUG_EXPAND_INCLUDES) && !(defined DEBUG_PARSER) && !(defined DEBUG_GETCHAR) #ifdef sh #include "sh.c" #endif @@ -3217,36 +3189,35 @@ int main(int argc, char **argv) { fatal_error("no input file"); } -#if !(defined DEBUG_CPP) && !(defined DEBUG_EXPAND_INCLUDES) - codegen_begin(); -#endif - ch = '\n'; + +#if defined DEBUG_GETCHAR // Read input + while (ch != EOF) { + get_ch(); + } +#elif defined DEBUG_CPP || defined DEBUG_EXPAND_INCLUDES // Tokenize input, output tokens get_tok(); -#if defined DEBUG_EXPAND_INCLUDES while (tok != EOF) { + skip_newlines = false; // Don't skip newlines so print_tok knows where to break lines + print_tok(tok, val); get_tok(); } -#else - while (tok != EOF) { -#ifdef DEBUG_CPP - print_tok(tok, val); +#elif defined DEBUG_PARSER // Parse input, output nothing get_tok(); + while (tok != EOF) { + decl = parse_definition(0); + } #else - + codegen_begin(); + get_tok(); + while (tok != EOF) { decl = parse_definition(0); #ifdef SH_INCLUDE_C_CODE output_declaration_c_code(get_op(decl) == '=' | get_op(decl) == VAR_DECLS); #endif codegen_glo_decl(decl); - -#endif } -#endif - - -#if !(defined DEBUG_CPP) && !(defined DEBUG_EXPAND_INCLUDES) codegen_end(); #endif diff --git a/run-pnut-reader.sh b/run-pnut-reader.sh deleted file mode 100644 index 432db182..00000000 --- a/run-pnut-reader.sh +++ /dev/null @@ -1,36 +0,0 @@ -#! /bin/sh - -TEMP_DIR="build" -PNUT_SH_OPTIONS="-DOPTIMIZE_LONG_LINES -DRT_NO_INIT_GLOBALS -Dsh" - -if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi - -gcc -o "$TEMP_DIR/pnut.exe" $PNUT_SH_OPTIONS pnut.c - -# gcc -E -C -P -DPNUT_CC -Dsh pnut.c > "$TEMP_DIR/pnut-after-cpp.c" - -./$TEMP_DIR/pnut.exe $PNUT_SH_OPTIONS -DDEBUG_GETCHAR "pnut.c" > "$TEMP_DIR/pnut-reader.sh" - -bootstrap_with_shell() { - - echo "Reading pnut.c with $1" - - time $1 "$TEMP_DIR/pnut-reader.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-read.c" - - wc pnut.c "$TEMP_DIR/pnut-read.c" -} - -# Handle runtime options -TEST_ALL_SHELLS=0 - -if [ $# -gt 0 ] && [ $1 = "TEST_ALL_SHELLS" ] ; then TEST_ALL_SHELLS=1; shift; fi - -bootstrap_with_shell "ksh" - -if [ $TEST_ALL_SHELLS -ne 0 ]; then - bootstrap_with_shell "dash" - bootstrap_with_shell "bash" - bootstrap_with_shell "zsh" - bootstrap_with_shell "yash" - bootstrap_with_shell "mksh" -fi diff --git a/run-pnut-tokenizer.sh b/run-pnut-tokenizer.sh deleted file mode 100644 index 2e6cece3..00000000 --- a/run-pnut-tokenizer.sh +++ /dev/null @@ -1,36 +0,0 @@ -#! /bin/sh - -TEMP_DIR="build" -PNUT_SH_OPTIONS="-DRT_NO_INIT_GLOBALS -Dsh" - -if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi - -gcc -o "$TEMP_DIR/pnut.exe" $PNUT_SH_OPTIONS pnut.c - -# gcc -E -C -P -DPNUT_CC -Dsh pnut.c > "$TEMP_DIR/pnut-after-cpp.c" - -./$TEMP_DIR/pnut.exe $PNUT_SH_OPTIONS -DDEBUG_CPP "pnut.c" > "$TEMP_DIR/pnut-tokenizer.sh" - -bootstrap_with_shell() { - - echo "Tokenizing with $1" - - time $1 "$TEMP_DIR/pnut-tokenizer.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-tokenized.c" - - wc pnut.c "$TEMP_DIR/pnut-tokenizer.sh" "$TEMP_DIR/pnut-tokenized.c" -} - -# Handle runtime options -TEST_ALL_SHELLS=0 - -if [ $# -gt 0 ] && [ $1 = "TEST_ALL_SHELLS" ] ; then TEST_ALL_SHELLS=1; shift; fi - -bootstrap_with_shell "ksh" - -if [ $TEST_ALL_SHELLS -ne 0 ]; then - bootstrap_with_shell "dash" - bootstrap_with_shell "bash" - bootstrap_with_shell "zsh" - bootstrap_with_shell "yash" - bootstrap_with_shell "mksh" -fi diff --git a/run-pnut-variant.sh b/run-pnut-variant.sh new file mode 100755 index 00000000..7f9b7b23 --- /dev/null +++ b/run-pnut-variant.sh @@ -0,0 +1,51 @@ +#! /bin/sh + +set -e + +: ${PNUT_OPTIONS:=} # Default to empty options + +TEMP_DIR="build" +PNUT_SH_OPTIONS="$PNUT_OPTIONS -DRT_NO_INIT_GLOBALS -Dsh" +PNUT_SH_OPTIONS_FAST="$PNUT_SH_OPTIONS -DSH_SAVE_VARS_WITH_SET -DOPTIMIZE_CONSTANT_PARAM" +PNUT_VARIANT_OPTIONS="" + +run_with_shell() { + + echo "Running $variant with $1" + + time $1 "$TEMP_DIR/pnut-$variant.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-$variant.output" +} + +# Parse the arguments +shell="$SHELL" # Use current shell as the default. "all" to test all shells. +variant="" + +while [ $# -gt 0 ]; do + case $1 in + --shell) shell="$2"; shift 2 ;; + --fast) PNUT_SH_OPTIONS="$PNUT_SH_OPTIONS_FAST"; shift 1 ;; + --reader) PNUT_VARIANT_OPTIONS="-DDEBUG_GETCHAR -Ush"; variant=${1#--}; shift 1 ;; + --tokenizer) PNUT_VARIANT_OPTIONS="-DDEBUG_CPP -Ush"; variant=${1#--}; shift 1 ;; + --parser) PNUT_VARIANT_OPTIONS="-DDEBUG_PARSER -Ush"; variant=${1#--}; shift 1 ;; + *) echo "Unknown option: $1"; exit 1;; + esac +done + +pnut_exec="$TEMP_DIR/pnut-$variant" + +if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi + +gcc -o "$pnut_exec" $PNUT_SH_OPTIONS pnut.c +$pnut_exec $PNUT_SH_OPTIONS $PNUT_VARIANT_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-$variant.sh" + +if [ "$shell" = "all" ]; then + set +e # Don't exit on error because we want to test all shells. + run_with_shell "dash" + run_with_shell "ksh" + run_with_shell "bash" + run_with_shell "yash" + run_with_shell "mksh" + run_with_shell "zsh" +else + run_with_shell "$shell" +fi diff --git a/sh.c b/sh.c index c7b6d627..a1fc354b 100644 --- a/sh.c +++ b/sh.c @@ -24,19 +24,6 @@ void handle_shell_include() { } } -void print_string_char(int c) { - if (c == 7) putstr("\\a"); - else if (c == 8) putstr("\\b"); - else if (c == 12) putstr("\\f"); - else if (c == 10) putstr("\\n"); - else if (c == 13) putstr("\\r"); - else if (c == 9) putstr("\\t"); - else if (c == 11) putstr("\\v"); - else if (c == '\\' || c == '\'' || c == '\"') { putstr("\\"); putchar(c); } - else if (c < 32 || c > 126) { putstr("\\"); putint(c>>6); putint((c>>3)&7); putint(c&7); } - else putchar(c); -} - // codegen #define text int