Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable DMABUF support for ROCm #309

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -247,14 +247,21 @@ AC_ARG_ENABLE([rocm],
[],
[enable_rocm=no])

AC_ARG_ENABLE([rocm_dmabuf],
[AS_HELP_STRING([--enable-rocm-dmabuf],
[Enable ROCm DMABUF feature])
],
[],
[enable_rocm_dmabuf=no])

AC_ARG_WITH([rocm],
[AS_HELP_STRING([--with-rocm=@<:@ROCm installation path@:>@],
[Provide path to ROCm installation])
],
[AS_CASE([$with_rocm],
[yes|no], [],
[CPPFLAGS="-I$with_rocm/include $CPPFLAGS"
LDFLAGS="-L$with_rocm/lib64 -Wl,-rpath=$with_rocm/lib64 -L$with_rocm/lib -Wl,-rpath=$with_rocm/lib -lamdhip64 $LDFLAGS"])
LDFLAGS="-L$with_rocm/lib64 -Wl,-rpath=$with_rocm/lib64 -L$with_rocm/lib -Wl,-rpath=$with_rocm/lib -lamdhip64 -lhsa-runtime64 $LDFLAGS"])
])

AS_IF([test "x$enable_rocm" = xyes], [
Expand All @@ -270,6 +277,11 @@ AS_IF([test "x$enable_rocm" = xyes], [

AM_CONDITIONAL([ROCM], [test x$enable_rocm = xyes])

AS_IF([test "x$enable_rocm_dmabuf" = xyes] && [test "x$HAVE_REG_DMABUF_MR" = "xyes"], [
AC_DEFINE([HAVE_ROCM_DMABUF], [1], [Enable ROCm DMABUF feature])
AC_CHECK_FUNCS([hsa_amd_portable_export_dmabuf])
])

AC_TRY_LINK([
#include <infiniband/verbs.h>],
[int x = IBV_ACCESS_ON_DEMAND;],[HAVE_EX_ODP=yes], [HAVE_EX_ODP=no])
Expand Down
21 changes: 20 additions & 1 deletion src/perftest_parameters.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,11 @@ static void usage(const char *argv0, VerbType verb, TestType tst, int connection
if (rocm_memory_supported()) {
printf(" --use_rocm=<rocm device id>");
printf(" Use selected ROCm device for GPUDirect RDMA testing\n");

if (rocm_memory_dmabuf_supported()) {
printf(" --use_rocm_dmabuf");
printf(" Use ROCm DMA-BUF for GPUDirect RDMA testing\n");
}
}

if (neuron_memory_supported()) {
Expand Down Expand Up @@ -875,6 +880,7 @@ static void init_perftest_params(struct perftest_parameters *user_param)
user_param->cuda_device_id = 0;
user_param->cuda_device_bus_id = NULL;
user_param->use_cuda_dmabuf = 0;
user_param->use_rocm_dmabuf = 0;
user_param->use_data_direct = 0;
user_param->rocm_device_id = 0;
user_param->neuron_core_id = 0;
Expand Down Expand Up @@ -2370,6 +2376,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
static int use_cuda_dmabuf_flag = 0;
static int use_data_direct_flag = 0;
static int use_rocm_flag = 0;
static int use_rocm_dmabuf_flag = 0;
static int use_neuron_flag = 0;
static int use_neuron_dmabuf_flag = 0;
static int use_hl_flag = 0;
Expand Down Expand Up @@ -2543,6 +2550,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
{ .name = "use_cuda_dmabuf", .has_arg = 0, .flag = &use_cuda_dmabuf_flag, .val = 1},
{ .name = "use_data_direct", .has_arg = 0, .flag = &use_data_direct_flag, .val = 1},
{ .name = "use_rocm", .has_arg = 1, .flag = &use_rocm_flag, .val = 1},
{ .name = "use_rocm_dmabuf", .has_arg = 0, .flag = &use_rocm_dmabuf_flag, .val = 1},
{ .name = "use_neuron", .has_arg = 1, .flag = &use_neuron_flag, .val = 1},
{ .name = "use_neuron_dmabuf", .has_arg = 0, .flag = &use_neuron_dmabuf_flag, .val = 1},
{ .name = "use_hl", .has_arg = 1, .flag = &use_hl_flag, .val = 1},
Expand Down Expand Up @@ -2982,6 +2990,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
if (((use_cuda_flag || use_cuda_bus_id_flag) && !cuda_memory_supported()) ||
(use_cuda_dmabuf_flag && !cuda_memory_dmabuf_supported()) ||
(use_rocm_flag && !rocm_memory_supported()) ||
(use_rocm_dmabuf_flag && !rocm_memory_dmabuf_supported()) ||
(use_neuron_flag && !neuron_memory_supported()) ||
(use_neuron_dmabuf_flag && !neuron_memory_dmabuf_supported()) ||
(use_hl_flag && !hl_memory_supported()) ||
Expand All @@ -2995,7 +3004,8 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
}
/* Memory types are mutually exclucive, make sure we were not already asked to use a different memory type. */
if (user_param->memory_type != MEMORY_HOST &&
(mmap_file_flag || use_mlu_flag || use_rocm_flag || use_neuron_flag || use_hl_flag ||
(mmap_file_flag || use_mlu_flag || use_neuron_flag || use_hl_flag ||
(use_rocm_flag && user_param->memory_type != MEMORY_ROCM) ||
((use_cuda_flag || use_cuda_bus_id_flag) && user_param->memory_type != MEMORY_CUDA))) {
fprintf(stderr, " Can't use multiple memory types\n");
return FAILURE;
Expand Down Expand Up @@ -3032,6 +3042,15 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
user_param->memory_create = rocm_memory_create;
use_rocm_flag = 0;
}
if (use_rocm_dmabuf_flag) {
user_param->use_rocm_dmabuf = 1;
if (user_param->memory_type != MEMORY_ROCM) {
fprintf(stderr, "ROCm DMA-BUF cannot be used without ROCm\n");
free(duplicates_checker);
return FAILURE;
}
use_rocm_dmabuf_flag = 0;
}
if (use_neuron_flag) {
user_param->neuron_core_id = strtol(optarg, NULL, 0);
if (user_param->neuron_core_id < 0) {
Expand Down
1 change: 1 addition & 0 deletions src/perftest_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ struct perftest_parameters {
int use_cuda_dmabuf;
int use_data_direct;
int rocm_device_id;
int use_rocm_dmabuf;
int neuron_core_id;
int use_neuron_dmabuf;
char *hl_device_bus_id;
Expand Down
88 changes: 88 additions & 0 deletions src/rocm_memory.c
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
* Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
*/

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/utsname.h>
#include "rocm_memory.h"
#include <hip/hip_runtime_api.h>
#if defined HAVE_HIP_HIP_VERSION_H
#include <hip/hip_version.h>
#endif
#include "perftest_parameters.h"
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>

#define ROCM_CHECK(stmt) \
do { \
Expand All @@ -25,6 +29,7 @@
struct rocm_memory_ctx {
struct memory_ctx base;
int device_id;
bool use_dmabuf;
};


Expand Down Expand Up @@ -69,6 +74,49 @@ int rocm_memory_init(struct memory_ctx *ctx) {
fprintf(stderr, "Couldn't initialize ROCm device\n");
return FAILURE;
}

#ifdef HAVE_ROCM_DMABUF
if (rocm_ctx->use_dmabuf) {
int dmabuf_supported = 0;
const char kernel_opt1[] = "CONFIG_DMABUF_MOVE_NOTIFY=y";
const char kernel_opt2[] = "CONFIG_PCI_P2PDMA=y";
int found_opt1 = 0;
int found_opt2 = 0;
FILE *fp;
struct utsname utsname;
char kernel_conf_file[128];
char buf[256];

if (uname(&utsname) == -1) {
printf("could not get kernel name");
return FAILURE;
}

snprintf(kernel_conf_file, sizeof(kernel_conf_file),
"/boot/config-%s", utsname.release);
fp = fopen(kernel_conf_file, "r");
if (fp == NULL) {
printf("could not open kernel conf file %s error: %m",
kernel_conf_file);
return FAILURE;
}

while (fgets(buf, sizeof(buf), fp) != NULL) {
if (strstr(buf, kernel_opt1) != NULL) {
found_opt1 = 1;
}
if (strstr(buf, kernel_opt2) != NULL) {
found_opt2 = 1;
}
if (found_opt1 && found_opt2) {
dmabuf_supported = 1;
break;
}
}
fclose(fp);
}
#endif

return SUCCESS;
}

Expand All @@ -85,12 +133,43 @@ int rocm_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t
hipError_t error;
size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1);

struct rocm_memory_ctx *rocm_ctx = container_of(ctx, struct rocm_memory_ctx, base);
error = hipMalloc(&d_A, buf_size);
if (error != hipSuccess) {
printf("hipMalloc error=%d\n", error);
return FAILURE;
}

#ifdef HAVE_ROCM_DMABUF
if (rocm_ctx->use_dmabuf) {
hipDeviceptr_t aligned_ptr;
const size_t host_page_size = sysconf(_SC_PAGESIZE);
uint64_t offset;
size_t aligned_size;
hsa_status_t status;

// Round down to host page size
aligned_ptr = (hipDeviceptr_t)((uintptr_t)d_A & ~(host_page_size - 1));
offset = d_A - aligned_ptr;
aligned_size = (size + offset + host_page_size - 1) & ~(host_page_size - 1);

printf("using DMA-BUF for GPU buffer address at %#llx aligned at %#llx with aligned size %zu\n", d_A, aligned_ptr, aligned_size);
*dmabuf_fd = 0;

status = hsa_amd_portable_export_dmabuf(d_A, aligned_size, dmabuf_fd, &offset);
if (status != HSA_STATUS_SUCCESS) {
printf("failed to export dmabuf handle for addr %p / %zu", d_A,
aligned_size);
return FAILURE;
}

printf("dmabuf export addr %p %lu to dmabuf fd %d offset %zu\n",
d_A, aligned_size, *dmabuf_fd, offset);

*dmabuf_offset = offset;
}
#endif

printf("allocated %lu bytes of GPU buffer at %p\n", (unsigned long)buf_size, d_A);
*addr = d_A;
*can_init = true;
Expand All @@ -107,6 +186,14 @@ bool rocm_memory_supported() {
return true;
}

bool rocm_memory_dmabuf_supported() {
#ifdef HAVE_ROCM_DMABUF
return true;
#else
return false;
#endif
}

struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) {
struct rocm_memory_ctx *ctx;

Expand All @@ -119,6 +206,7 @@ struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) {
ctx->base.copy_buffer_to_host = memcpy;
ctx->base.copy_buffer_to_buffer = memcpy;
ctx->device_id = params->rocm_device_id;
ctx->use_dmabuf = params->use_rocm_dmabuf;

return &ctx->base;
}
6 changes: 6 additions & 0 deletions src/rocm_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ struct perftest_parameters;

bool rocm_memory_supported();

bool rocm_memory_dmabuf_supported();

struct memory_ctx *rocm_memory_create(struct perftest_parameters *params);


Expand All @@ -25,6 +27,10 @@ inline bool rocm_memory_supported() {
return false;
}

inline bool rocm_memory_dmabuf_supported() {
return false;
}

inline struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) {
return NULL;
}
Expand Down