Skip to content

Commit

Permalink
cpu/drcbearm64.cpp: Optimise load/store and call generation.
Browse files Browse the repository at this point in the history
  • Loading branch information
cuavas committed Feb 1, 2025
1 parent 3b67f9a commit 7af44ce
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 47 deletions.
80 changes: 34 additions & 46 deletions src/devices/cpu/drcbearm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,21 +196,21 @@ class ThrowableErrorHandler : public ErrorHandler

// helper functions

a64::Vec select_register(a64::Vec const &reg, uint32_t regsize)
inline a64::Vec select_register(a64::Vec const &reg, uint32_t regsize)
{
if (regsize == 4)
return reg.s();
return reg.d();
}

a64::Gp select_register(a64::Gp const &reg, uint32_t regsize)
inline a64::Gp select_register(a64::Gp const &reg, uint32_t regsize)
{
if (regsize == 4)
return reg.w();
return reg.x();
}

bool is_valid_immediate_mask(uint64_t val, size_t bytes)
inline bool is_valid_immediate_mask(uint64_t val, size_t bytes)
{
// all zeros and all ones aren't allowed, and disallow any value with bits outside of the max bit range
if (val == 0 || val == make_bitmask<uint64_t>(bytes * 8))
Expand All @@ -231,18 +231,28 @@ bool is_valid_immediate_mask(uint64_t val, size_t bytes)
return population_count_64(val) == head - tail;
}

bool is_valid_immediate(uint64_t val, size_t bits)
inline bool is_valid_immediate(uint64_t val, size_t bits)
{
assert(bits < 64);
return val < (uint64_t(1) << bits);
}

bool is_valid_immediate_signed(int64_t val, size_t bits)
inline constexpr bool is_valid_immediate_signed(int64_t val, size_t bits)
{
return util::sext(val, bits) == val;
}

bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
inline constexpr bool is_valid_offset(int64_t diff, int max_shift)
{
if (is_valid_immediate_signed(diff, 9))
return true; // 9-bit signed offset
else if (diff >= 0 && (diff < (1 << (12 + max_shift))) && !(diff & make_bitmask<int64_t>(max_shift)))
return true; // 12-bit unsigned offset shifted by operand size
else
return false;
}

inline bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
{
// If the bottom 12 bits are 0s then an optimized form can be used if the remaining bits are <= 12
if (is_valid_immediate(val, 12) || ((val & 0xfff) == 0 && is_valid_immediate(val >> 12, 12)))
Expand All @@ -254,7 +264,7 @@ bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &sr
return false;
}

bool emit_sub_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
inline bool emit_sub_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
{
if (val < 0)
val = -val;
Expand Down Expand Up @@ -509,11 +519,11 @@ void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp &reg, const
a.mov(reg, val);
}

void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg &reg, const void *ptr) const
inline void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg &reg, int max_shift, const void *ptr) const
{
// If it can fit as a constant offset
// If it can fit as an immediate offset
const int64_t diff = (int64_t)ptr - (int64_t)m_baseptr;
if (is_valid_immediate_signed(diff, 9))
if (is_valid_offset(diff, max_shift))
{
a.emit(opcode, reg, arm::Mem(BASE_REG, diff));
return;
Expand Down Expand Up @@ -549,29 +559,7 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode,

if (diff >= 0)
{
int shift = 0;
int max_shift = 0;

if (opcode == a64::Inst::kIdLdrb || opcode == a64::Inst::kIdLdrsb)
max_shift = 0;
else if (opcode == a64::Inst::kIdLdrh || opcode == a64::Inst::kIdLdrsh)
max_shift = 1;
else if (opcode == a64::Inst::kIdLdrsw)
max_shift = 2;
else
max_shift = (reg.isGpW() || reg.isVecS()) ? 2 : 3;

for (int i = 0; i < 64 && max_shift > 0; i++)
{
if ((uint64_t)ptr & ((uint64_t)(1) << i))
{
shift = i;
break;
}
}

if (shift > max_shift)
shift = max_shift;
const int shift = (diff & make_bitmask<int64_t>(max_shift)) ? 0 : max_shift;

if (is_valid_immediate(diff >> shift, 32))
{
Expand All @@ -595,7 +583,7 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode,

a.adrp(MEM_SCRATCH_REG, targetpage);

if (is_valid_immediate_signed(pageoffs, 9))
if (is_valid_offset(pageoffs, max_shift))
{
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG, pageoffs));
return;
Expand All @@ -612,18 +600,18 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode,
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG));
}

void drcbe_arm64::emit_ldr_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr, reg, ptr); }
void drcbe_arm64::emit_ldrb_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrb, reg, ptr); }
void drcbe_arm64::emit_ldrh_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrh, reg, ptr); }
void drcbe_arm64::emit_ldrsb_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsb, reg, ptr); }
void drcbe_arm64::emit_ldrsh_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsh, reg, ptr); }
void drcbe_arm64::emit_ldrsw_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsw, reg, ptr); }
void drcbe_arm64::emit_str_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr, reg, ptr); }
void drcbe_arm64::emit_strb_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrb, reg, ptr); }
void drcbe_arm64::emit_strh_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrh, reg, ptr); }
void drcbe_arm64::emit_ldr_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr, reg, reg.isGpW() ? 2 : 3, ptr); }
void drcbe_arm64::emit_ldrb_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrb, reg, 0, ptr); }
void drcbe_arm64::emit_ldrh_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrh, reg, 1, ptr); }
void drcbe_arm64::emit_ldrsb_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsb, 0, reg, ptr); }
void drcbe_arm64::emit_ldrsh_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsh, 1, reg, ptr); }
void drcbe_arm64::emit_ldrsw_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsw, 2, reg, ptr); }
void drcbe_arm64::emit_str_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr, reg, reg.isGpW() ? 2 : 3, ptr); }
void drcbe_arm64::emit_strb_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrb, reg, 0, ptr); }
void drcbe_arm64::emit_strh_mem(a64::Assembler &a, const a64::Gp &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrh, reg, 1, ptr); }

void drcbe_arm64::emit_float_ldr_mem(a64::Assembler &a, const a64::Vec &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr_v, reg, ptr); }
void drcbe_arm64::emit_float_str_mem(a64::Assembler &a, const a64::Vec &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr_v, reg, ptr); }
void drcbe_arm64::emit_float_ldr_mem(a64::Assembler &a, const a64::Vec &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr_v, reg, reg.isVecS() ? 2 : 3, ptr); }
void drcbe_arm64::emit_float_str_mem(a64::Assembler &a, const a64::Vec &reg, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr_v, reg, reg.isVecS() ? 2 : 3, ptr); }

void drcbe_arm64::mov_reg_param(a64::Assembler &a, uint32_t regsize, const a64::Gp &dst, const be_parameter &src) const
{
Expand Down Expand Up @@ -803,7 +791,7 @@ void drcbe_arm64::call_arm_addr(a64::Assembler &a, const void *offs) const
{
const uint64_t codeoffs = a.code()->baseAddress() + a.offset();
const int64_t reloffs = (int64_t)offs - codeoffs;
if (is_valid_immediate_signed(reloffs, 26))
if (is_valid_immediate_signed(reloffs, 26 + 2))
{
a.bl(offs);
}
Expand Down
2 changes: 1 addition & 1 deletion src/devices/cpu/drcbearm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class drcbe_arm64 : public drcbe_interface
// helper functions
void get_imm_relative(asmjit::a64::Assembler &a, const asmjit::a64::Gp &reg, const uint64_t ptr) const;

void emit_ldr_str_base_mem(asmjit::a64::Assembler &a, asmjit::a64::Inst::Id opcode, const asmjit::a64::Reg &reg, const void *ptr) const;
void emit_ldr_str_base_mem(asmjit::a64::Assembler &a, asmjit::a64::Inst::Id opcode, const asmjit::a64::Reg &reg, int max_shift, const void *ptr) const;
void emit_ldr_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp &reg, const void *ptr) const;
void emit_ldrb_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp &reg, const void *ptr) const;
void emit_ldrh_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp &reg, const void *ptr) const;
Expand Down

0 comments on commit 7af44ce

Please sign in to comment.