function copy_float {
params {
uint n,
blob input float src,
blob output float dst
},
implementations {
naive
}
}
core/tools/functions_hpp_gen.py <definition-file> <implementations-dir> <headers-dir>
e.g.:
core/tools/functions_hpp_gen.py functions.fdef . .
Following files will be created:
copy_float.hpp - should not be modified:
// copy_float function autogenerated file
#pragma once
#include "functions_base.hpp"
namespace iclgpu { namespace functions {
struct copy_float
{
...
struct params
{
uint32_t n;
blob<float, input> src;
blob<float, output> dst;
};
struct score : public function_score<params_num>
{
float& n;
float& src;
float& dst;
...
};
using impl = function_impl_execute<copy_float>;
using selector = selector_accept<copy_float>;
};
namespace implementations
{
struct copy_float_naive : copy_float::impl
{
...
bool accept(const copy_float::params& params, copy_float::score& score) override;
event execute(const copy_float::params& params, const std::vector<event>& dep_events) override;
};
}
...
} } // namespace iclgpu::functions
// End of copy_float autogenerated
copy_float/copy_float_naive.cl:
__kernel void copy_float_naive(int n, __global float* src, __global float* dst)
{
// TODO Add implementation code here:
}
copy_float/copy_float_naive.cpp:
...
namespace iclgpu { namespace functions { namespace implementations {
bool copy_float_naive::accept(const copy_float::params& params, copy_float::score& score)
{
// TODO Add implementation code here:
return false;
}
event copy_float_naive::execute(const copy_float::params& params, const std::vector<event>& dep_events)
{
// TODO Modify implementation code here:
auto engine = context()->get_engine();
auto kernel = engine->get_kernel(kernel_name, module_name);
size_t buf_size = 1;
kernel->set_arg(0, params.n);
auto buf_src = engine->get_input_buffer(params.src, buf_size);
kernel->set_arg(1, buf_src);
auto buf_dst = engine->get_output_buffer(params.dst, buf_size);
kernel->set_arg(2, buf_dst);
auto gws = nd_range(1);
auto lws = null_range;
auto options = kernel_options(gws, lws);
kernel->set_options(options);
return kernel->submit(dep_events);
}
} } } // namespace iclgpu::functions::implementations
__kernel void copy_float_naive(__global float* src, __global float* dst)
{
gid = get_global_id(0);
dst[gid] = src[gid];
}
NOTE: Kernel parameter n is removed.
- Write code for accept() method to correctly validate parameters.
- Modify execute() method to properly call the kernel written in .cl file.
bool copy_float_naive::accept(const copy_float::params& params, copy_float::score& score)
{
return true;
}
event copy_float_naive::execute(const copy_float::params& params, const std::vector<event>& dep_events)
{
auto engine = context()->get_engine();
auto kernel = engine->get_kernel(kernel_name, module_name);
size_t buf_size = params.n;
auto buf_src = engine->get_input_buffer(params.src, buf_size);
kernel->set_arg(0, buf_src);
auto buf_dst = engine->get_output_buffer(params.dst, buf_size);
kernel->set_arg(1, buf_dst);
auto gws = nd_range(buf_size);
auto lws = null_range;
auto options = kernel_options(gws, lws);
kernel->set_options(options);
return kernel->submit(dep_events);
}
NOTE: Kernel parameter n is removed and indexes for src and dst parameters are decremented.
#include <iclgpu/context.hpp>
#include <iclgpu/dispatcher.hpp>
#include "copy_float.hpp"
...
uint n = 5;
float src[5] = { 1.f, 2.f, 3.f, 4.f, 5.f };
float dst[5];
iclgpu::functions::copy_float::params params{ n, src, dst };
iclgpu::context::create()
->get_dispatcher()
->execute_function<iclgpu::functions::copy_float>(params)
->wait();
The function sources autogenerator does not modify existing .cpp and .cl files.
struct_definition | func_definiton...
struct_definition => struct class_name '{' scalar_type field_name [',' scalar_type field_name...] '}'
func_definiton => function class_name '{' params_def, impls_def '}'
scalar_type => simple_type | struct class_name
simple_type => char|uchar|short|ushort|int|uint|long|ulong|float|double
params_def => params '{' param_type field_name [',' param_type field_name...] '}'
param_type => blob_type|scalar_type
blob_type => blob [input|output|inout] void | scalar_type
impls_def => implementations '{' class_name[',' class_name...] '}'
class_name => word allowed for C++ class/structure name
field_name => word allowed for C++ class/structure data member name
Copyright © 2018, Intel® Corporation