Skip to content

Commit

Permalink
Merge pull request #109 from udem-dlteam/laurent/dont-use-temporary-w…
Browse files Browse the repository at this point in the history
…hen-assignment-used-in-condition

Dont use temporary when `x = f()` used in condition
  • Loading branch information
laurenthuberdeau authored Oct 20, 2024
2 parents db6a8b1 + 6f8c0af commit f30d6fd
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 78 deletions.
22 changes: 11 additions & 11 deletions examples/compiled/base64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,21 +105,21 @@ _encode() {
}

defarr _lut 256
: $((__t1 = c = 0))
: $((c = 0))
_get() {
let c; let __t1
while _getchar __t1; [ $((c = __t1)) -ge 0 ]; do
let c
while _getchar c; [ $c -ge 0 ]; do
if [ $((c = _$((_lut + c)))) -ge 0 ] ; then
break
fi
done
: $(($1 = c))
endlet $1 __t1 c
endlet $1 c
}

: $((__t1 = c4 = c3 = c2 = c1 = i = 0))
: $((c4 = c3 = c2 = c1 = i = 0))
_decode() {
let i; let c1; let c2; let c3; let c4; let __t1
let i; let c1; let c2; let c3; let c4
i=0
while [ $i -lt 256 ]; do
: $((_$((_lut + i)) = -1))
Expand All @@ -130,21 +130,21 @@ _decode() {
: $((_$((_lut + 255 & _$((_codes + i)))) = i))
: $((i += 1))
done
while _get __t1; [ $((c1 = __t1)) -ge 0 ]; do
if _get __t1; [ $((c2 = __t1)) -lt 0 ] ; then
while _get c1; [ $c1 -ge 0 ]; do
if _get c2; [ $c2 -lt 0 ] ; then
exit 1
fi
printf \\$((((c1 << 2) | (c2 >> 4))/64))$((((c1 << 2) | (c2 >> 4))/8%8))$((((c1 << 2) | (c2 >> 4))%8))
if _get __t1; [ $((c3 = __t1)) -lt 0 ] ; then
if _get c3; [ $c3 -lt 0 ] ; then
break
fi
printf \\$(((255 & ((c2 << 4) | (c3 >> 2)))/64))$(((255 & ((c2 << 4) | (c3 >> 2)))/8%8))$(((255 & ((c2 << 4) | (c3 >> 2)))%8))
if _get __t1; [ $((c4 = __t1)) -lt 0 ] ; then
if _get c4; [ $c4 -lt 0 ] ; then
break
fi
printf \\$(((255 & ((c3 << 6) | c4))/64))$(((255 & ((c3 << 6) | c4))/8%8))$(((255 & ((c3 << 6) | c4))%8))
done
endlet $1 __t1 c4 c3 c2 c1 i
endlet $1 c4 c3 c2 c1 i
}

: $((myargv = argc = 0))
Expand Down
14 changes: 7 additions & 7 deletions examples/compiled/c4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ _main() { # argc: $2, argv: $3
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
return
fi
if _open __t1 $((_$argv_)) 0; [ $((fd = __t1)) -lt 0 ] ; then
if _open fd $((_$argv_)) 0; [ $fd -lt 0 ] ; then
printf "could not open("
_put_pstr __ $((_$argv_))
printf ")\n"
Expand All @@ -837,25 +837,25 @@ _main() { # argc: $2, argv: $3
return
fi
poolsz=$((256 * 1024))
if _malloc __t1 $poolsz; [ $((!(_sym = __t1))) != 0 ] ; then
if _malloc _sym $poolsz; [ $((!_sym)) != 0 ] ; then
printf "could not malloc(%d) symbol area\n" $poolsz
: $(($1 = -1))
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
return
fi
if _malloc __t1 $poolsz; [ $((!(_le = _e = __t1))) != 0 ] ; then
if _malloc _e $poolsz; [ $((!(_le = _e))) != 0 ] ; then
printf "could not malloc(%d) text area\n" $poolsz
: $(($1 = -1))
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
return
fi
if _malloc __t1 $poolsz; [ $((!(_data = __t1))) != 0 ] ; then
if _malloc _data $poolsz; [ $((!_data)) != 0 ] ; then
printf "could not malloc(%d) data area\n" $poolsz
: $(($1 = -1))
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
return
fi
if _malloc __t1 $poolsz; [ $((!(sp = __t1))) != 0 ] ; then
if _malloc sp $poolsz; [ $((!sp)) != 0 ] ; then
printf "could not malloc(%d) stack area\n" $poolsz
: $(($1 = -1))
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
Expand All @@ -879,13 +879,13 @@ _main() { # argc: $2, argv: $3
: $((_$((_id + _Tk)) = _Char))
_next __
idmain=$_id
if _malloc __t1 $poolsz; [ $((!(_lp = _p = __t1))) != 0 ] ; then
if _malloc _p $poolsz; [ $((!(_lp = _p))) != 0 ] ; then
printf "could not malloc(%d) source area\n" $poolsz
: $(($1 = -1))
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
return
fi
if _read __t1 $fd $_p $((poolsz - 1)); [ $((i = __t1)) -le 0 ] ; then
if _read i $fd $_p $((poolsz - 1)); [ $i -le 0 ] ; then
printf "read() returned %d\n" $i
: $(($1 = -1))
: $((__tmp = $1)) $((argc = $4)) $((argv_ = $5)) $((fd = $6)) $((bt = $7)) $((ty = $8)) $((poolsz = $9)) $((idmain = ${10})) $((pc = ${11})) $((sp = ${12})) $((bp = ${13})) $((a = ${14})) $((cycle = ${15})) $((i = ${16})) $((t = ${17})) $((__t1 = ${18})) $(($1 = __tmp))
Expand Down
10 changes: 5 additions & 5 deletions examples/compiled/cp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ _malloc() { # $2 = object size
defarr() { _malloc $1 $2; }

defarr _buffer 1024
: $((__t1 = len = c = dst = src = args = argc = 0))
: $((len = c = dst = src = args = argc = 0))
_main() { let argc $2; let args $3
let src; let dst; let c; let len; let __t1
let src; let dst; let c; let len
if [ $argc != 3 ] ; then
printf "Usage: cp <source> <destination>\n"
: $(($1 = 1))
endlet $1 __t1 len c dst src args argc
endlet $1 len c dst src args argc
return
fi
_open src $((_$((args + 1)))) 0
Expand All @@ -39,10 +39,10 @@ _main() { let argc $2; let args $3
if [ $dst = 0 ] ; then
_file_error __ $((_$((args + 2))))
fi
while _read __t1 $src $_buffer 1024; [ $((len = __t1)) != 0 ]; do
while _read len $src $_buffer 1024; [ $len != 0 ]; do
_write __ $dst $_buffer $len
done
endlet $1 __t1 len c dst src args argc
endlet $1 len c dst src args argc
}

# Runtime library
Expand Down
8 changes: 4 additions & 4 deletions examples/compiled/wc-stdin.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ _is_word_separator() { let c $2
endlet $1 c
}

: $((__t1 = last_sep = sep = c = chars = words = lines = 0))
: $((last_sep = sep = c = chars = words = lines = 0))
_main() {
let lines; let words; let chars; let c; let sep; let last_sep; let __t1
let lines; let words; let chars; let c; let sep; let last_sep
lines=0
words=0
chars=0
sep=0
last_sep=0
while _getchar __t1; [ $((c = __t1)) != -1 ]; do
while _getchar c; [ $c != -1 ]; do
: $((chars += 1))
if [ $c = $__NEWLINE__ ] ; then
: $((lines += 1))
Expand All @@ -28,7 +28,7 @@ _main() {
last_sep=$sep
done
printf "%d %d %d\n" $lines $words $chars
endlet $1 __t1 last_sep sep c chars words lines
endlet $1 last_sep sep c chars words lines
}

# Character constants
Expand Down
8 changes: 4 additions & 4 deletions examples/compiled/welcome.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
set -e -u -f
LC_ALL=C

: $((__t1 = i = name = 0))
: $((i = name = 0))
_main() {
let name; let i; let __t1
let name; let i
_malloc name 100
i=0
printf "What is your name?\n"
while { _getchar __t1; [ $((_$((name + i)) = __t1)) != -1 ]; } && [ $((_$((name + i)))) != $__NEWLINE__ ]; do
while { _getchar _$((name + i)); [ $((_$((name + i)))) != -1 ]; } && [ $((_$((name + i)))) != $__NEWLINE__ ]; do
: $((i += 1))
done
: $((_$((name + i)) = __NUL__))
printf "Hello, "
_put_pstr __ $name
printf "\n"
endlet $1 __t1 i name
endlet $1 i name
}

# Character constants
Expand Down
109 changes: 62 additions & 47 deletions sh.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ bool comp_body(ast node, STMT_CTX stmt_ctx);
bool comp_statement(ast node, STMT_CTX stmt_ctx);
void mark_mutable_variables_body(ast node);
void handle_enum_struct_union_type_decl(ast node);
ast handle_side_effects_go(ast node, int executes_conditionally);

// Because concatenating strings is very expensive and a common operation, we
// use a tree structure to represent the concatenated strings. That way, the
Expand Down Expand Up @@ -948,23 +949,69 @@ ast replaced_fun_calls_tail = 0;
ast conditional_fun_calls = 0;
ast conditional_fun_calls_tail = 0;
ast literals_inits = 0;
int executes_conditionally = 0;
int contains_side_effects = 0;
bool contains_side_effects = 0;

ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_conditionally) {
int start_gensym_ix = gensym_ix;
ast new_tail;
ast sub2;

if (assign_to == 0) {
assign_to = fresh_ident(); // Unique identifier for the function call
start_gensym_ix = gensym_ix;

// At this point, the temporary identifier of the variable is not live and
// can be used to evaluate the function arguments. This reduces the number
// of temporary variables.
gensym_ix -= 1;
}

// Traverse the arguments and replace them with the result of handle_side_effects_go
sub2 = get_child(node, 1);
if (sub2 != 0) { // Check if not an empty list
if (get_op(sub2) == ',') {
while (get_op(sub2) == ',') {
set_child(sub2, 0, handle_side_effects_go(get_child(sub2, 0), executes_conditionally));
sub2 = get_child(sub2, 1);
}
} else { // sub2 is the first argument, not wrapped in a cons cell
sub2 = handle_side_effects_go(sub2, executes_conditionally);
set_child(node, 1, sub2);
}
}

// All the temporary variables used for the function parameters can be
// reused after the function call, so resetting the gensym counter.
gensym_ix = start_gensym_ix;

new_tail = new_ast2(',', assign_to, node);
new_tail = new_ast2(',', new_tail, 0);
if (executes_conditionally) {
if (conditional_fun_calls == 0) { conditional_fun_calls = new_tail; }
else { set_child(conditional_fun_calls_tail, 1, new_tail); }
conditional_fun_calls_tail = new_tail;
}
else {
if (replaced_fun_calls == 0) { replaced_fun_calls = new_tail; }
else { set_child(replaced_fun_calls_tail, 1, new_tail); }
replaced_fun_calls_tail = new_tail;
}

return assign_to;
}

// We can't have function calls and other side effects in $(( ... )), so we need to handle them separately.
// For unconditional function calls, they are replaced with unique identifiers and returned as a list with their new identifiers.
// For pre/post-increments/decrements, we map them to a pre-side-effects and replace with the corresponding operation.
// Note that pre/post-increments/decrements of function calls are not supported.
ast handle_side_effects_go(ast node, int executes_conditionally) {
ast handle_side_effects_go(ast node, bool executes_conditionally) {
int op = get_op(node);
int nb_children = get_nb_children(node);
int start_gensym_ix;
ast sub1;
ast sub2;
ast previous_conditional_fun_calls;
ast left_conditional_fun_calls;
ast right_conditional_fun_calls;
ast new_tail;

if (nb_children == 0) {
if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR || op == INTEGER || op == CHARACTER) {
Expand Down Expand Up @@ -995,51 +1042,19 @@ ast handle_side_effects_go(ast node, int executes_conditionally) {
}
} else if (nb_children == 2) {
if (op == '(') { // Function call
sub1 = fresh_ident(); // Unique identifier for the function call

start_gensym_ix = gensym_ix;

// At this point, the temporary identifier of the variable is not live and
// can be used to evaluate the function arguments. This reduces the number
// of temporary variables.
gensym_ix -= 1;

// Traverse the arguments and replace them with the result of handle_side_effects_go
sub2 = get_child(node, 1);
if (sub2 != 0) { // Check if not an empty list
if (get_op(sub2) == ',') {
while (get_op(sub2) == ',') {
set_child(sub2, 0, handle_side_effects_go(get_child(sub2, 0), executes_conditionally));
sub2 = get_child(sub2, 1);
}
} else { // sub2 is the first argument, not wrapped in a cons cell
sub2 = handle_side_effects_go(sub2, executes_conditionally);
set_child(node, 1, sub2);
}
}

// All the temporary variables used for the function parameters can be
// reused after the function call, so resetting the gensym counter.
gensym_ix = start_gensym_ix;

new_tail = new_ast2(',', sub1, node);
new_tail = new_ast2(',', new_tail, 0);
if (executes_conditionally) {
if (conditional_fun_calls == 0) { conditional_fun_calls = new_tail; }
else { set_child(conditional_fun_calls_tail, 1, new_tail); }
conditional_fun_calls_tail = new_tail;
}
else {
if (replaced_fun_calls == 0) { replaced_fun_calls = new_tail; }
else { set_child(replaced_fun_calls_tail, 1, new_tail); }
replaced_fun_calls_tail = new_tail;
return handle_fun_call_side_effect(node, 0, executes_conditionally);
} else if (op == '=') {
if (get_op(get_child(node, 1)) == '(') { // Function call
// In that case, we reuse the left hand side of the assignment as the result location
return handle_fun_call_side_effect(get_child(node, 1), get_child(node, 0), executes_conditionally);
} else {
sub1 = handle_side_effects_go(get_child(node, 0), executes_conditionally);
sub2 = handle_side_effects_go(get_child(node, 1), executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call
return new_ast2(op, sub1, sub2);
}

return sub1;
} else if (op == '&' || op == '|' || op == '<' || op == '>' || op == '+' || op == '-' || op == '*' || op == '/'
|| op == '%' || op == '^' || op == ',' || op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ || op == LSHIFT || op == RSHIFT || op == '=' || op == '['
|| op == '%' || op == '^' || op == ',' || op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ || op == LSHIFT || op == RSHIFT || op == '['
|| op == '.' || op == ARROW ) {
// We can't place handle_side_effects_go directly in new_ast2 call because six-cc creates a global variable that gets overwritten in the other handle_side_effects_go calls
sub1 = handle_side_effects_go(get_child(node, 0), executes_conditionally);
sub2 = handle_side_effects_go(get_child(node, 1), executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call
return new_ast2(op, sub1, sub2);
Expand Down

0 comments on commit f30d6fd

Please sign in to comment.