Skip to content

Latest commit

 

History

History
 
 

iclgpu

Intel Compute Library for GPU Core library

How to define and implement a new function

Step 1: Create function definition using functions definition syntax described below. For example:

function copy_float {
   params {
        uint n,
        blob input float src,
        blob output float dst
    },
    implementations {
        naive
    }
}

Step 2: Run autogenerator:

core/tools/functions_hpp_gen.py <definition-file> <implementations-dir> <headers-dir>

e.g.:

core/tools/functions_hpp_gen.py functions.fdef . .

Following files will be created:

The function header

copy_float.hpp - should not be modified:

// copy_float function autogenerated file
#pragma once
#include "functions_base.hpp"

namespace iclgpu { namespace functions {
struct copy_float
{
    ...
    struct params
    {
        uint32_t n;
        blob<float, input> src;
        blob<float, output> dst;
    };

    struct score : public function_score<params_num>
    {
        float& n;
        float& src;
        float& dst;
        ...
    };

    using impl = function_impl_execute<copy_float>;
    using selector = selector_accept<copy_float>;
};

namespace implementations
{
struct copy_float_naive : copy_float::impl
{
    ...
    bool accept(const copy_float::params& params, copy_float::score& score) override;
    event execute(const copy_float::params& params, const std::vector<event>& dep_events) override;
};
}
...
} } // namespace iclgpu::functions
// End of copy_float autogenerated

The OpenCL kernel module for the function implementation naive

copy_float/copy_float_naive.cl:

__kernel void copy_float_naive(int n, __global float* src, __global float* dst)
{
    // TODO Add implementation code here:
}

The C++ implementation code for the function implementation naive

copy_float/copy_float_naive.cpp:

...
namespace iclgpu { namespace functions { namespace implementations {

bool copy_float_naive::accept(const copy_float::params& params, copy_float::score& score)
{
    // TODO Add implementation code here:
    return false;
}

event copy_float_naive::execute(const copy_float::params& params, const std::vector<event>& dep_events)
{
    // TODO Modify implementation code here:
    auto engine = context()->get_engine();
    auto kernel = engine->get_kernel(kernel_name, module_name);
    size_t buf_size = 1;

    kernel->set_arg(0, params.n);
    auto buf_src = engine->get_input_buffer(params.src, buf_size);
    kernel->set_arg(1, buf_src);
    auto buf_dst = engine->get_output_buffer(params.dst, buf_size);
    kernel->set_arg(2, buf_dst);

    auto gws = nd_range(1);
    auto lws = null_range;
    auto options = kernel_options(gws, lws);
    kernel->set_options(options);

    return kernel->submit(dep_events);
}

} } } // namespace iclgpu::functions::implementations

Step 3: Write OpenCL kernel for the function implementation in copy_float_my_impl.cl

__kernel void copy_float_naive(__global float* src, __global float* dst)
{
    gid = get_global_id(0);
    dst[gid] = src[gid];
}

NOTE: Kernel parameter n is removed.

Step 4: Modify copy_float_my_impl.cpp :

  1. Write code for accept() method to correctly validate parameters.
  2. Modify execute() method to properly call the kernel written in .cl file.
bool copy_float_naive::accept(const copy_float::params& params, copy_float::score& score)
{
    return true;
}

event copy_float_naive::execute(const copy_float::params& params, const std::vector<event>& dep_events)
{
    auto engine = context()->get_engine();
    auto kernel = engine->get_kernel(kernel_name, module_name);
    size_t buf_size = params.n;

    auto buf_src = engine->get_input_buffer(params.src, buf_size);
    kernel->set_arg(0, buf_src);
    auto buf_dst = engine->get_output_buffer(params.dst, buf_size);
    kernel->set_arg(1, buf_dst);

    auto gws = nd_range(buf_size);
    auto lws = null_range;
    auto options = kernel_options(gws, lws);
    kernel->set_options(options);

    return kernel->submit(dep_events);
}

NOTE: Kernel parameter n is removed and indexes for src and dst parameters are decremented.

Step 5: Execute function implementation:

#include <iclgpu/context.hpp>
#include <iclgpu/dispatcher.hpp>
#include "copy_float.hpp"
...
uint n = 5;
float src[5] = { 1.f, 2.f, 3.f, 4.f, 5.f };
float dst[5];

iclgpu::functions::copy_float::params params{ n, src, dst };
iclgpu::context::create()
    ->get_dispatcher()
    ->execute_function<iclgpu::functions::copy_float>(params)
    ->wait();

Note

The function sources autogenerator does not modify existing .cpp and .cl files.

Functions definition syntax

struct_definition | func_definiton...

struct_definition => struct class_name '{' scalar_type field_name [',' scalar_type field_name...] '}'

func_definiton => function class_name '{' params_def, impls_def '}'

scalar_type => simple_type | struct class_name

simple_type => char|uchar|short|ushort|int|uint|long|ulong|float|double

params_def => params '{' param_type field_name [',' param_type field_name...] '}'

param_type => blob_type|scalar_type

blob_type => blob [input|output|inout] void | scalar_type

impls_def => implementations '{' class_name[',' class_name...] '}'

class_name => word allowed for C++ class/structure name

field_name => word allowed for C++ class/structure data member name


Copyright © 2018, Intel® Corporation