diff --git a/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp b/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp index 37dffa12001..7397506cd60 100644 --- a/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp +++ b/src/cpu/x64/matmul/brgemm_matmul_copy_utils.cpp @@ -2102,7 +2102,8 @@ struct jit_brgemm_matmul_copy_b_int8_t : public jit_brgemm_matmul_copy_b_t, constexpr static int reg_src_offs_ = 0; constexpr static int reg_tr_src_offs_ = 8; - constexpr static int stack_space_needed_ = 16; + constexpr static int reg_current_K_pad_offs_ = 16; + constexpr static int stack_space_needed_ = 24; const int comp_acc_idx_; @@ -2708,9 +2709,11 @@ void jit_brgemm_matmul_copy_b_int8_t::generate() { auto compute_K_loop = [&](bool is_N_tail) { int ncolumns = is_N_tail ? conf_->N_tail : conf_->N_blk; + mov(reg_K_iters, ptr[param1 + GET_OFF(current_K_pad)]); + mov(ptr[rsp + reg_current_K_pad_offs_], reg_K_iters); mov(reg_K_iters, ptr[param1 + GET_OFF(current_K_iters)]); compute_K_loop_body(reg_K_iters, ncolumns, is_N_tail, false); - mov(reg_K_iters, ptr[param1 + GET_OFF(current_K_pad)]); + mov(reg_K_iters, ptr[rsp + reg_current_K_pad_offs_]); compute_K_loop_body(reg_K_iters, ncolumns, is_N_tail, true); };