Skip to content

Commit

Permalink
Use x86 JS/JNS for GE/LT(ADD/SUB(_,_), 0) to avoid comparison with zero
Browse files Browse the repository at this point in the history
  • Loading branch information
dstogov committed Dec 26, 2024
1 parent 3e67509 commit 82ee409
Showing 1 changed file with 74 additions and 30 deletions.
104 changes: 74 additions & 30 deletions ir_x86.dasc
Original file line number Diff line number Diff line change
Expand Up @@ -1945,7 +1945,8 @@ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
} else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) &&
(insn->op == IR_EQ || insn->op == IR_NE))) {
(insn->op == IR_EQ || insn->op == IR_NE ||
insn->op == IR_LT || insn->op == IR_GE))) {
/* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
Expand Down Expand Up @@ -2610,7 +2611,8 @@ store_int:
((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) &&
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE)))) {
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE)))) {
/* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
Expand Down Expand Up @@ -2752,7 +2754,8 @@ store_int:
if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) &&
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) {
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) {
ir_match_fuse_load_commutative_int(ctx, op1_insn, ref);
ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP;
Expand All @@ -2776,7 +2779,8 @@ store_int:
if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) ||
/* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */
((op_insn->op == IR_ADD || op_insn->op == IR_SUB) &&
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) {
(op2_insn->op == IR_EQ || op2_insn->op == IR_NE ||
op2_insn->op == IR_LT || op2_insn->op == IR_GE))) {
if (ctx->ir_base[op_insn->op1].op == IR_LOAD
&& ctx->ir_base[op_insn->op1].op2 == store_insn->op2) {
if (ir_in_same_block(ctx, op_insn->op1)
Expand Down Expand Up @@ -5607,7 +5611,7 @@ static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_ins
ir_emit_cmp_int_common(ctx, type, root, cmp_insn, op1_reg, op1, op2_reg, op2);
}

static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg)
static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg, bool after_op)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
Expand All @@ -5622,10 +5626,18 @@ static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg)
| setne Rb(def_reg)
break;
case IR_LT:
| setl Rb(def_reg)
if (after_op) {
| sets Rb(def_reg)
} else {
| setl Rb(def_reg)
}
break;
case IR_GE:
| setge Rb(def_reg)
if (after_op) {
| setns Rb(def_reg)
} else {
| setge Rb(def_reg)
}
break;
case IR_LE:
| setle Rb(def_reg)
Expand Down Expand Up @@ -5735,7 +5747,7 @@ static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}
ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2);
_ir_emit_setcc_int(ctx, op, def_reg);
_ir_emit_setcc_int(ctx, op, def_reg, 0);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
Expand Down Expand Up @@ -5832,7 +5844,7 @@ static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)

IR_ASSERT(def_reg != IR_REG_NONE);
ir_emit_test_int_common(ctx, def, insn->op1, insn->op);
_ir_emit_setcc_int(ctx, insn->op, def_reg);
_ir_emit_setcc_int(ctx, insn->op, def_reg, 0);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
Expand All @@ -5843,7 +5855,7 @@ static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);

IR_ASSERT(def_reg != IR_REG_NONE);
_ir_emit_setcc_int(ctx, insn->op, def_reg);
_ir_emit_setcc_int(ctx, insn->op, def_reg, 1);
if (IR_REG_SPILLED(ctx->regs[def][0])) {
ir_emit_store(ctx, insn->type, def, def_reg);
}
Expand Down Expand Up @@ -5987,7 +5999,7 @@ static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
}
}

static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp)
static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp, bool after_op)
{
uint32_t true_block, false_block;
ir_backend_data *data = ctx->data;
Expand Down Expand Up @@ -6018,10 +6030,18 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint
| jne =>true_block
break;
case IR_LT:
| jl =>true_block
if (after_op) {
| js =>true_block
} else {
| jl =>true_block
}
break;
case IR_GE:
| jge =>true_block
if (after_op) {
| jns =>true_block
} else {
| jge =>true_block
}
break;
case IR_LE:
| jle =>true_block
Expand Down Expand Up @@ -6157,7 +6177,7 @@ static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_i
if (!same_comparison) {
ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2);
}
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1, 0);
}

static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
Expand All @@ -6173,13 +6193,13 @@ static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_
}

ir_emit_test_int_common(ctx, def, op2, op);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 1, 0);
}

static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
{
ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 0);
ir_emit_jcc(ctx, b, def, insn, next_block, op, 0, 0);
}

static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
Expand Down Expand Up @@ -6227,7 +6247,7 @@ static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, u
}
| ASM_MEM_IMM_OP cmp, type, mem, 0
}
ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1);
ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1, 0);
}

static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
Expand Down Expand Up @@ -9102,7 +9122,7 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
}
}

static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp)
static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp, bool after_op)
{
ir_backend_data *data = ctx->data;
dasm_State **Dst = &data->dasm_state;
Expand Down Expand Up @@ -9136,10 +9156,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
| je =>target
break;
case IR_LT:
| jge =>target
if (after_op) {
| jns =>target
} else {
| jge =>target
}
break;
case IR_GE:
| jl =>target
if (after_op) {
| js =>target
} else {
| jl =>target
}
break;
case IR_LE:
| jg =>target
Expand Down Expand Up @@ -9207,10 +9235,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
| je &target_addr
break;
case IR_LT:
| jge &target_addr
if (after_op) {
| jns &target_addr
} else {
| jge &target_addr
}
break;
case IR_GE:
| jl &target_addr
if (after_op) {
| js &target_addr
} else {
| jl &target_addr
}
break;
case IR_LE:
| jg &target_addr
Expand Down Expand Up @@ -9275,10 +9311,18 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next
| jne &addr
break;
case IR_LT:
| jl &addr
if (after_op) {
| js &addr
} else {
| jl &addr
}
break;
case IR_GE:
| jge &addr
if (after_op) {
| jns &addr
} else {
| jge &addr
}
break;
case IR_LE:
| jle &addr
Expand Down Expand Up @@ -9393,7 +9437,7 @@ static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, ui
} else {
op = IR_NE;
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0);
} else {
|.if X64
if (insn->op == IR_GUARD) {
Expand Down Expand Up @@ -9471,7 +9515,7 @@ static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
op ^= 1; // reverse
}

return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0);
}

static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
Expand All @@ -9482,7 +9526,7 @@ static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0, 0);
}

static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
Expand All @@ -9491,7 +9535,7 @@ static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn
ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE;

ir_emit_test_int_common(ctx, def, insn->op2, op);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 0);
}

static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
Expand All @@ -9502,7 +9546,7 @@ static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *
if (insn->op == IR_GUARD) {
op ^= 1; // reverse
}
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1);
return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1, 1);
}

static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
Expand Down Expand Up @@ -10617,7 +10661,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
} else {
IR_ASSERT(op >= IR_EQ && op <= IR_UGT);
}
ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1);
ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1, 1);
}
break;
case IR_GUARD_CMP_INT:
Expand Down

0 comments on commit 82ee409

Please sign in to comment.