Skip to content

Commit

Permalink
xe: conv_v2: remove hw from kernel descriptor
Browse files Browse the repository at this point in the history
  • Loading branch information
echeresh committed Jan 7, 2025
1 parent dbc96ca commit f766ccd
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 47 deletions.
4 changes: 2 additions & 2 deletions src/gpu/intel/jit/v2/conv/builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ class post_op_builder_t : public ir_builder_t {
emit(func.call({expr_t(rhs.elems()), rhs_buf}));
}
ir_assert(lhs.nblocks() > 0);
int max_simd = (2 * desc_.hw.grf_size()) / sizeof(float);
int max_simd = (2 * desc_.hw_desc.grf_size()) / sizeof(float);
auto &lhs0 = lhs.blocks()[0];
int elems = math::gcd(max_simd, lhs0.int_size());
bool is_bcast = !rhs.dim_sizes().has(lhs0.dim);
Expand Down Expand Up @@ -966,7 +966,7 @@ class conv_builder_t : public ir_builder_t {

stmt_t build_ir(const exec_config_t &exec_cfg, const kernel_desc_t &desc,
var_manager_t &var_mgr) {
auto plan = create_conv_plan(desc);
auto plan = create_conv_plan(desc, exec_cfg.hw());
if (!plan) ir_except_not_implemented("Cannot create plan.");

ir_info() << desc << std::endl;
Expand Down
15 changes: 8 additions & 7 deletions src/gpu/intel/jit/v2/conv/kernel_desc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,26 +192,27 @@ int estimate_grf_usage_bytes(const kernel_desc_t &desc) {
return into<int>(abc_size);
}

bool is_tg_size_ok(const kernel_desc_t &desc) {
int max_tg_size = desc.hw.max_tg_size(desc.regs, desc.simd);
bool is_tg_size_ok(const kernel_desc_t &desc, const hw_t &hw) {
int max_tg_size = hw.max_tg_size(desc.regs, desc.simd);
return desc.thread_group_tile.elems() <= max_tg_size;
}

bool is_grf_usage_ok(const kernel_desc_t &desc) {
int size = estimate_grf_usage_bytes(desc);
if (size > desc.hw.grf_size() * desc.regs) { return false; }
if (size > desc.hw_desc.grf_size() * desc.regs) { return false; }
return true;
}

bool kernel_desc_t::is_supported() const {
bool kernel_desc_t::is_supported(const hw_t &hw) const {
ir_check(prop != prop_kind::undef)
<< "Invalid prop: " << ir_utils::to_string(prop);
ir_check(!hw.is_undef()) << "Invalid hw: " << jit::to_string(hw.to_ngen());
ir_check(hw_desc.hw != ngen::HW::Unknown)
<< "Invalid hw: " << jit::to_string(hw_desc.hw);
ir_check(fma != fma_kind_t::undef)
<< "Invalid fma: " << jit::to_string(fma);
ir_check(simd != 0) << "Invalid simd: " << simd;
ir_check(regs != 0) << "Invalid regs: " << regs;
ir_check(is_tg_size_ok(*this))
ir_check(is_tg_size_ok(*this, hw))
<< "Invalid thread_group_tile: " << thread_group_tile;
if (use_stream_k) {
ir_check(c_type() == accumulator_type(a_type(), b_type()))
Expand Down Expand Up @@ -417,7 +418,7 @@ std::string kernel_desc_t::str() const {
oss << "Source tag: " << src_tag << std::endl;
oss << "Weights tag: " << wei_tag << std::endl;
oss << "Destination tag: " << dst_tag << std::endl;
oss << "HW: " << jit::to_string(hw.to_ngen())
oss << "HW: " << jit::to_string(hw_desc.hw)
<< std::endl;
oss << "FMA kind: " << to_string(fma) << std::endl;
oss << "SIMD: " << simd << std::endl;
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/intel/jit/v2/conv/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ namespace conv {
struct hw_desc_t {
ngen::HW hw = ngen::HW::Unknown;

int grf_size() const { return ngen::GRF::bytes(hw); }
void stringify(std::ostream &out) const { jit::stringify(out, hw); }
void parse(std::istream &in) { jit::parse(in, hw); }
#if __cplusplus >= 202002L
Expand Down Expand Up @@ -289,11 +290,10 @@ class kernel_desc_t : public kernel_desc_base_t {
extensions_t ext;
gpu_post_ops_t post_ops;

hw_t hw;
bool is_finalized = false;

bool is_empty() const { return prop == prop_kind::undef; }
bool is_supported() const;
bool is_supported(const hw_t &hw) const;
void set(const std::string &s);
void set_defaults();
void finalize(const prb_reqs_t &final_reqs);
Expand Down
45 changes: 23 additions & 22 deletions src/gpu/intel/jit/v2/conv/plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,8 @@ class multiply_info_t {
class plan_builder_t {
public:
plan_builder_t() = default;
plan_builder_t(const kernel_desc_t &desc) : desc_(desc) {
plan_builder_t(const kernel_desc_t &desc, const hw_t &hw)
: desc_(desc), hw_(hw) {
reqs_ = desc_.reqs;
desc_.reqs = prb_reqs_t();
}
Expand Down Expand Up @@ -437,12 +438,12 @@ class plan_builder_t {
}

plan_t init_plan() {
plan_t plan(desc_.hw);
plan_t plan(hw_);
if (!try_init_plan(plan, reqs_) || !check_plan(plan)) return plan_t();

// Re-create plan to ensure all collected requirements are cross-used
// between sub-plans.
plan = plan_t(desc_.hw);
plan = plan_t(hw_);
if (!try_init_plan(plan, reqs_) || !check_plan(plan)) {
ir_error_not_expected();
return plan_t();
Expand Down Expand Up @@ -525,10 +526,10 @@ class plan_builder_t {
} else {
auto &src = load.reg_layout();
auto dst = mul_info_.to_compatible_layout(abc, load.reg_layout());
reorder = reorder_plan_t(desc_.hw, src, dst);
reorder = reorder_plan_t(hw_, src, dst);
reg_layout = reorder.dst;
}
plan = x2r_plan_t(desc_.hw);
plan = x2r_plan_t(hw_);
plan.tensor_kind = abc;
plan.load = std::move(load);
plan.reorder = std::move(reorder);
Expand All @@ -545,7 +546,7 @@ class plan_builder_t {
auto inst_tile = mul_info_.inst_tile();
auto acc_layout = mul_info_.acc_layout(a, b, c_layout_);
ir_check(!acc_layout.is_empty()) << "init_fma_plan: cannot vectorize.";
plan = fma_plan_t(desc_.hw);
plan = fma_plan_t(hw_);
plan.simd = desc_.simd;
plan.fma = desc_.fma;
plan.a_layout = a;
Expand Down Expand Up @@ -648,7 +649,7 @@ class plan_builder_t {
if (bias_reg_layout != store.reg_layout()) {
auto store_layout = store.reg_layout();
if (bias_reg_layout != store_layout) {
plan.bias_reorder = reorder_plan_t(desc_.hw);
plan.bias_reorder = reorder_plan_t(hw_);
plan.bias_reorder.src = std::move(bias_reg_layout);
plan.bias_reorder.dst = std::move(store_layout);
}
Expand Down Expand Up @@ -691,7 +692,7 @@ class plan_builder_t {
ir_assert(k_tg > 1);
ir_assert(desc_.thread_group_tile.elems() == k_tg)
<< "Local k-slicing assumes no split by M/N.";
ir_check(c_layout.size() % desc_.hw.grf_size() == 0)
ir_check(c_layout.size() % hw_.grf_size() == 0)
<< "init_slm_reduce_plan: c_layout is not aligned to a "
"reigster boundary.";

Expand Down Expand Up @@ -740,11 +741,11 @@ class plan_builder_t {

auto &load_layout = load.reg_layout();
auto reduced_layout = load_layout.map(split_view.tile());
auto reduce = reduce_plan_t(desc_.hw, load_layout, reduced_layout);
auto reduce = reduce_plan_t(hw_, load_layout, reduced_layout);
auto c_post_layout = std::move(reduced_layout);
c_post_layout.remove(k_dim);

plan = slm_reduce_plan_t(desc_.hw);
plan = slm_reduce_plan_t(hw_);
plan.store = std::move(store);
plan.load = std::move(load);
plan.reduce = std::move(reduce);
Expand All @@ -770,7 +771,7 @@ class plan_builder_t {
auto c_reg_tile_layout = c_reg_layout.map(tile);
auto store_layout = store.reg_layout().map(tile);
if (c_reg_tile_layout != store_layout) {
plan.reorder = reorder_plan_t(desc_.hw);
plan.reorder = reorder_plan_t(hw_);
plan.reorder.src = std::move(c_reg_tile_layout);
plan.reorder.dst = std::move(store_layout);
}
Expand Down Expand Up @@ -798,11 +799,11 @@ class plan_builder_t {
}

bool check_plan(const plan_t &plan) const {
int grf_bound = desc_.hw.grf_size() * desc_.regs;
int grf_bound = hw_.grf_size() * desc_.regs;
int grf_bytes = plan.grf_usage_bytes();
ir_check(grf_bytes <= grf_bound) << "check_plan: out of registers";
int slm_bound = compute::device_info_t::max_slm_size_per_tg(
convert_ngen_arch_to_dnnl(desc_.hw.to_ngen()),
convert_ngen_arch_to_dnnl(hw_.to_ngen()),
into<int>(desc_.thread_group_tile.elems()), desc_.regs > 128);
int slm_bytes = plan.slm_usage_bytes();
ir_check(slm_bytes <= slm_bound) << "check_plan: out of SLM";
Expand All @@ -818,7 +819,7 @@ class plan_builder_t {
if (type.is_f32()) op = send_op_t::atomic_fadd;
}
send_params_t params;
params.hw = desc_.hw;
params.hw = hw_;
params.kind = (send_kind != send_kind_t::undef
? send_kind
: desc_.access_kind(op, abc));
Expand All @@ -837,6 +838,7 @@ class plan_builder_t {
}

kernel_desc_t desc_;
hw_t hw_;

dim_mapper_manager_t dim_mapper_manager_;
multiply_info_t mul_info_;
Expand All @@ -852,16 +854,16 @@ class plan_builder_t {
};

template <typename KernelDescT>
plan_t create_conv_plan_impl(KernelDescT &desc, bool finalize) {
if (!desc.is_supported()) return plan_t();
plan_t create_conv_plan_impl(KernelDescT &desc, const hw_t &hw, bool finalize) {
if (!desc.is_supported(hw)) return plan_t();
ir_assert(!desc.has_spec_strategy())
<< "Kernel descriptor strategies are required to be specialized "
"before plan creation";
if (!finalize) {
ir_assert(desc.is_finalized)
<< "Kernel descriptor must be finalized before plan creation";
}
plan_builder_t builder(desc);
plan_builder_t builder(desc, hw);
auto plan = builder.build();
if (plan) {
if (finalize) {
Expand All @@ -873,18 +875,17 @@ plan_t create_conv_plan_impl(KernelDescT &desc, bool finalize) {
return plan;
}

plan_t create_conv_plan(const kernel_desc_t &desc) {
return create_conv_plan_impl(desc, /*finalize=*/false);
plan_t create_conv_plan(const kernel_desc_t &desc, const hw_t &hw) {
return create_conv_plan_impl(desc, hw, /*finalize=*/false);
}

bool finalize_conv_desc_impl(kernel_desc_t &desc, const hw_t &hw,
const problem_t *prb, plan_t *out_plan) {
if (desc.is_empty()) return false;
if (desc.hw_desc.hw != hw.to_ngen()) return false;
desc.hw = hw;
if (!desc.is_supported()) return false;
if (!desc.is_supported(hw)) return false;
if (desc.is_finalized) return true;
auto plan = create_conv_plan_impl(desc, /*finalize=*/true);
auto plan = create_conv_plan_impl(desc, hw, /*finalize=*/true);
if (plan) {
if (out_plan) *out_plan = plan;
if (prb && !desc.matches(*prb)) return false;
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/v2/conv/plan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ struct plan_t : public base_plan_t {
IR_DEFINE_DUMP()
};

plan_t create_conv_plan(const kernel_desc_t &desc);
plan_t create_conv_plan(const kernel_desc_t &desc, const hw_t &hw);
bool finalize_conv_desc(
kernel_desc_t &desc, const problem_t &prb, plan_t *plan = nullptr);
bool finalize_conv_desc(
Expand Down
5 changes: 3 additions & 2 deletions src/gpu/intel/jit/v2/conv/planner/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -623,14 +623,15 @@ bench_data_t bench(const bench_manager_t &bench_mger,
const kernel_desc_t &_kernel_desc, int nprbs) {
auto kernel_desc = _kernel_desc;
if (!finalize_conv_desc(kernel_desc, bench_mger.hw())) return {};
bench_runner_t runner(bench_mger, bench_input_params_t(kernel_desc, nprbs));
bench_runner_t runner(bench_mger,
bench_input_params_t(kernel_desc, bench_mger.hw(), nprbs));
return runner.bench(kernel_desc);
}

bool try_create(
const bench_manager_t &bench_mger, const kernel_desc_t &kernel_desc) {
clear_primitive_cache();
bench_input_params_t params(kernel_desc, /*nprbs=*/1);
bench_input_params_t params(kernel_desc, bench_mger.hw(), /*nprbs=*/1);
bench_task_t task(generate_problems(params)[0]);
auto engine = bench_mger.get_engine();
auto guard = plan_preset_t::instance().make_guard(kernel_desc);
Expand Down
9 changes: 4 additions & 5 deletions src/gpu/intel/jit/v2/conv/planner/bench.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2023-2024 Intel Corporation
* Copyright 2023-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,11 +67,10 @@ struct bench_input_params_t {
int nprbs = 0;

bench_input_params_t() = default;
bench_input_params_t(
const kernel_desc_t &kernel_desc, int nprbs = default_nprbs)
: nprbs(nprbs) {
bench_input_params_t(const kernel_desc_t &kernel_desc, const hw_t &hw,
int nprbs = default_nprbs)
: hw(hw), nprbs(nprbs) {
ir_assert(kernel_desc.is_finalized);
hw = kernel_desc.hw;
prop = kernel_desc.prop;
src_tag = kernel_desc.src_tag;
wei_tag = kernel_desc.wei_tag;
Expand Down
9 changes: 4 additions & 5 deletions src/gpu/intel/jit/v2/conv/planner/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,11 @@ class search_kernel_desc_group_t {
descs_.push_back(desc);
}

bench_input_params_t bench_input_params(int nprbs) const {
bench_input_params_t bench_input_params(int nprbs, const hw_t &hw) const {
if (descs_.empty()) return bench_input_params_t();
auto &kd = descs_.front();
bench_input_params_t params;
params.hw = kd.hw;
params.hw = hw;
params.prop = kd.prop;
params.src_tag = kd.src_tag;
params.wei_tag = kd.wei_tag;
Expand Down Expand Up @@ -575,7 +575,8 @@ bench_data_set_t bench_kernel_desc_group(const bench_manager_t &bench_mger,
const search_kernel_desc_group_t &desc_group, int nprbs,
int max_descs) {
auto eng = bench_mger.get_engine();
bench_runner_t runner(bench_mger, desc_group.bench_input_params(nprbs));
bench_runner_t runner(
bench_mger, desc_group.bench_input_params(nprbs, bench_mger.hw()));
bench_data_set_t bd_set;
search_sequence_t seq(desc_group.descs(), max_descs);
while (seq) {
Expand Down Expand Up @@ -656,8 +657,6 @@ void auto_search(
kernel_desc_t desc;
parse_result_t parse_result;
iface.parse(line, desc, &parse_result);
// TODO: Remove.
desc.hw = hw_t(bench_mger.get_engine().get());
kernel_search_manager_t mger(
bench_mger, search_params_t(desc, parse_result));
mger.search();
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2023-2024 Intel Corporation
* Copyright 2023-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down

0 comments on commit f766ccd

Please sign in to comment.