Skip to content

Commit

Permalink
Add memcpy function.
Browse files Browse the repository at this point in the history
  • Loading branch information
tangjj11 committed Apr 9, 2024
1 parent a9c47e2 commit 8f386ae
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 36 deletions.
17 changes: 9 additions & 8 deletions clang/lib/DPCT/ASTTraversal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10373,11 +10373,16 @@ void MemoryMigrationRule::memcpyMigration(
}

if (ReplaceStr.empty()) {
ReplaceStr = MapNames::getDpctNamespace();
if (DpctGlobalInfo::useExtBindlessImages() &&
NameRef.find("cudaMemcpy3D") == 0) {
ReplaceStr += "experimental::";
}
if (IsAsync) {
ReplaceStr = MapNames::getDpctNamespace() + "async_dpct_memcpy";
ReplaceStr += "async_dpct_memcpy";
requestFeature(HelperFeatureEnum::device_ext);
} else {
ReplaceStr = MapNames::getDpctNamespace() + "dpct_memcpy";
ReplaceStr += "dpct_memcpy";
requestFeature(HelperFeatureEnum::device_ext);
}
}
Expand Down Expand Up @@ -11611,12 +11616,8 @@ void MemoryDataTypeRule::emplaceMemcpy3DDeclarations(const VarDecl *VD,
emplaceTransformation(ReplaceVarDecl::getVarDeclReplacement(
VD, "// These variables are defined for 3d matrix memory copy."));
}
emplaceParamDecl(VD,
DpctGlobalInfo::useExtBindlessImages()
? MapNames::getDpctNamespace() +
"experimental::image_mem_pitched_data"
: MapNames::getDpctNamespace() + "pitched_data",
false, "0", "from_data", "to_data");
emplaceParamDecl(VD, MapNames::getDpctNamespace() + "pitched_data", false,
"0", "from_data", "to_data");
requestFeature(HelperFeatureEnum::device_ext);
emplaceParamDecl(VD, getCtadType("id"), true, "0", "from_pos", "to_pos");
emplaceParamDecl(VD, getCtadType("range"), true, "1", "size");
Expand Down
87 changes: 62 additions & 25 deletions clang/runtime/dpct-rt/include/dpct/bindless_images.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,6 @@ namespace experimental {

#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES

class image_mem_pitched_data : public pitched_data {
public:
using pitched_data::pitched_data;
image_mem_pitched_data(
sycl::ext::oneapi::experimental::image_mem_handle handle, size_t pitch,
size_t x, size_t y)
: pitched_data(nullptr, pitch, x, y), _handle(handle) {}
image_mem_pitched_data(pitched_data &p)
: pitched_data(p.get_data_ptr(), p.get_pitch(), p.get_x(), p.get_y()) {}
image_mem_pitched_data &operator=(pitched_data &p) {
this->set_data_ptr(p.get_data_ptr());
this->set_pitch(p.get_pitch());
this->set_x(p.get_x());
this->set_y(p.get_y());
return *this;
}

private:
sycl::ext::oneapi::experimental::image_mem_handle _handle;
};

/// The wrapper class of bindless image memory handle.
class image_mem_wrapper {
public:
Expand Down Expand Up @@ -109,10 +88,9 @@ class image_mem_wrapper {
return _sub_wrappers + level;
}
/// Convert to image mem pitched data.
image_mem_pitched_data to_pitched_data() {
return image_mem_pitched_data(_handle,
_desc.width * _channel.get_total_size(),
_desc.width, _desc.height);
pitched_data to_pitched_data() {
return pitched_data(this, _desc.width * _channel.get_total_size(),
_desc.width, _desc.height);
}

private:
Expand Down Expand Up @@ -292,6 +270,46 @@ dpct_memcpy(void *src, sycl::ext::oneapi::experimental::image_mem_handle dest,
dest_offset, desc_dest, copy_extend));
return event_list;
}

static inline std::vector<sycl::event>
dpct_memcpy(pitched_data to, sycl::id<3> to_pos, pitched_data from,
sycl::id<3> from_pos, sycl::range<3> size,
memcpy_direction direction = automatic,
sycl::queue &q = get_default_queue()) {
auto to_offset = sycl::range<3>(to_pos[0], to_pos[1], to_pos[2]);
auto from_offset = sycl::range<3>(from_pos[0], from_pos[1], from_pos[2]);
auto to_img_mem = reinterpret_cast<image_mem_wrapper *>(to.get_data_ptr());
auto from_img_mem =
reinterpret_cast<image_mem_wrapper *>(from.get_data_ptr());
if (to_img_mem != nullptr && from_img_mem != nullptr) {
std::vector<sycl::event> event_list;
auto temp = (void *)sycl::malloc_device(size.size(), q);
// TODO: Need change logic when sycl support image_mem to image_mem copy.
const auto src_extend = sycl::range<3>(from.get_pitch(), from.get_y(), 1);
event_list.push_back(q.ext_oneapi_copy(
from_img_mem->get_handle(), from_offset, from_img_mem->get_desc(), temp,
sycl::range<3>(0, 0, 0), src_extend, size));
const auto dest_extend = sycl::range<3>(to.get_pitch(), to.get_y(), 1);
event_list.push_back(q.ext_oneapi_copy(
temp, sycl::range<3>(0, 0, 0), dest_extend, to_img_mem->get_handle(),
to_offset, to_img_mem->get_desc(), size));
sycl::free(temp, q);
return event_list;
} else if (to_img_mem != nullptr) {
const auto src_extend = sycl::range<3>(from.get_pitch(), from.get_y(), 1);
return {q.ext_oneapi_copy(from.get_data_ptr(), from_offset, src_extend,
to_img_mem->get_handle(), to_offset,
to_img_mem->get_desc(), size)};
} else if (from_img_mem != nullptr) {
const auto dest_extend = sycl::range<3>(to.get_pitch(), to.get_y(), 1);
return {q.ext_oneapi_copy(from_img_mem->get_handle(), from_offset,
from_img_mem->get_desc(), to.get_data_ptr(),
to_offset, dest_extend, size)};
} else {
return dpct::detail::dpct_memcpy(q, to, to_pos, from, from_pos, size,
direction);
}
}
} // namespace detail

/// Create bindless image according to image data and sampling info.
Expand Down Expand Up @@ -771,6 +789,25 @@ static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src,
sycl::free(temp, q);
}

/// TODO:
static inline void async_dpct_memcpy(pitched_data to, sycl::id<3> to_pos,
pitched_data from, sycl::id<3> from_pos,
sycl::range<3> size,
memcpy_direction direction = automatic,
sycl::queue &q = get_default_queue()) {
detail::dpct_memcpy(to, to_pos, from, from_pos, size, direction, q);
}

/// TODO:
static inline void dpct_memcpy(pitched_data to, sycl::id<3> to_pos,
pitched_data from, sycl::id<3> from_pos,
sycl::range<3> size,
memcpy_direction direction = automatic,
sycl::queue &q = get_default_queue()) {
sycl::event::wait(
detail::dpct_memcpy(to, to_pos, from, from_pos, size, direction, q));
}

using image_mem_wrapper_ptr = image_mem_wrapper *;

#endif
Expand Down
6 changes: 3 additions & 3 deletions clang/test/dpct/texture/texture_object_bindless_image.cu
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ int main() {
cudaMemcpyToArrayAsync(pArr, w_offest_dest, h_offest_dest, input, w * h,
cudaMemcpyHostToDevice);

// CHECK: dpct::experimental::image_mem_pitched_data p3d_from_data_ct1, p3d_to_data_ct1;
// CHECK: dpct::pitched_data p3d_from_data_ct1, p3d_to_data_ct1;
// CHECK-NEXT: sycl::id<3> p3d_from_pos_ct1(0, 0, 0), p3d_to_pos_ct1(0, 0, 0);
// CHECK-NEXT: sycl::range<3> p3d_size_ct1(1, 1, 1);
// CHECK-NEXT: dpct::memcpy_direction p3d_direction_ct1;
Expand All @@ -151,9 +151,9 @@ int main() {
p3d.extent = e;
// CHECK: p3d_direction_ct1 = k;
p3d.kind = k;
// CHECK: dpct::dpct_memcpy(p3d_to_data_ct1, p3d_to_pos_ct1, p3d_from_data_ct1, p3d_from_pos_ct1, p3d_size_ct1, p3d_direction_ct1);
// CHECK: dpct::experimental::dpct_memcpy(p3d_to_data_ct1, p3d_to_pos_ct1, p3d_from_data_ct1, p3d_from_pos_ct1, p3d_size_ct1, p3d_direction_ct1);
cudaMemcpy3D(&p3d);
// CHECK: dpct::async_dpct_memcpy(p3d_to_data_ct1, p3d_to_pos_ct1, p3d_from_data_ct1, p3d_from_pos_ct1, p3d_size_ct1, p3d_direction_ct1);
// CHECK: dpct::experimental::async_dpct_memcpy(p3d_to_data_ct1, p3d_to_pos_ct1, p3d_from_data_ct1, p3d_from_pos_ct1, p3d_size_ct1, p3d_direction_ct1);
cudaMemcpy3DAsync(&p3d);

// CHECK: dpct::image_data resDesc0, resDesc1, resDesc2, resDesc3, resDesc4;
Expand Down

0 comments on commit 8f386ae

Please sign in to comment.