From f5c303528fe7978df40ebbb0dc230786d4784a14 Mon Sep 17 00:00:00 2001 From: Pak Lui Date: Tue, 14 Jan 2025 16:18:29 -0800 Subject: [PATCH] Enable dmabuf to ROCm Enable DMABUF to ROCm using HIP/HSA as interface --- configure.ac | 14 ++++++- src/perftest_parameters.c | 21 +++++++++- src/perftest_parameters.h | 1 + src/rocm_memory.c | 88 +++++++++++++++++++++++++++++++++++++++ src/rocm_memory.h | 6 +++ 5 files changed, 128 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 3c609e79..6610abe7 100755 --- a/configure.ac +++ b/configure.ac @@ -247,6 +247,13 @@ AC_ARG_ENABLE([rocm], [], [enable_rocm=no]) +AC_ARG_ENABLE([rocm_dmabuf], + [AS_HELP_STRING([--enable-rocm-dmabuf], + [Enable ROCm DMABUF feature]) + ], + [], + [enable_rocm_dmabuf=no]) + AC_ARG_WITH([rocm], [AS_HELP_STRING([--with-rocm=@<:@ROCm installation path@:>@], [Provide path to ROCm installation]) @@ -254,7 +261,7 @@ AC_ARG_WITH([rocm], [AS_CASE([$with_rocm], [yes|no], [], [CPPFLAGS="-I$with_rocm/include $CPPFLAGS" - LDFLAGS="-L$with_rocm/lib64 -Wl,-rpath=$with_rocm/lib64 -L$with_rocm/lib -Wl,-rpath=$with_rocm/lib -lamdhip64 $LDFLAGS"]) + LDFLAGS="-L$with_rocm/lib64 -Wl,-rpath=$with_rocm/lib64 -L$with_rocm/lib -Wl,-rpath=$with_rocm/lib -lamdhip64 -lhsa-runtime64 $LDFLAGS"]) ]) AS_IF([test "x$enable_rocm" = xyes], [ @@ -270,6 +277,11 @@ AS_IF([test "x$enable_rocm" = xyes], [ AM_CONDITIONAL([ROCM], [test x$enable_rocm = xyes]) +AS_IF([test "x$enable_rocm_dmabuf" = xyes] && [test "x$HAVE_REG_DMABUF_MR" = "xyes"], [ + AC_DEFINE([HAVE_ROCM_DMABUF], [1], [Enable ROCm DMABUF feature]) + AC_CHECK_FUNCS([hsa_amd_portable_export_dmabuf]) + ]) + AC_TRY_LINK([ #include ], [int x = IBV_ACCESS_ON_DEMAND;],[HAVE_EX_ODP=yes], [HAVE_EX_ODP=no]) diff --git a/src/perftest_parameters.c b/src/perftest_parameters.c index f4fd12e3..a573eafa 100755 --- a/src/perftest_parameters.c +++ b/src/perftest_parameters.c @@ -630,6 +630,11 @@ static void usage(const char *argv0, VerbType verb, TestType tst, int connection if (rocm_memory_supported()) { printf(" --use_rocm="); printf(" Use selected ROCm device for GPUDirect RDMA testing\n"); + + if (rocm_memory_dmabuf_supported()) { + printf(" --use_rocm_dmabuf"); + printf(" Use ROCm DMA-BUF for GPUDirect RDMA testing\n"); + } } if (neuron_memory_supported()) { @@ -875,6 +880,7 @@ static void init_perftest_params(struct perftest_parameters *user_param) user_param->cuda_device_id = 0; user_param->cuda_device_bus_id = NULL; user_param->use_cuda_dmabuf = 0; + user_param->use_rocm_dmabuf = 0; user_param->use_data_direct = 0; user_param->rocm_device_id = 0; user_param->neuron_core_id = 0; @@ -2370,6 +2376,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc) static int use_cuda_dmabuf_flag = 0; static int use_data_direct_flag = 0; static int use_rocm_flag = 0; + static int use_rocm_dmabuf_flag = 0; static int use_neuron_flag = 0; static int use_neuron_dmabuf_flag = 0; static int use_hl_flag = 0; @@ -2543,6 +2550,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc) { .name = "use_cuda_dmabuf", .has_arg = 0, .flag = &use_cuda_dmabuf_flag, .val = 1}, { .name = "use_data_direct", .has_arg = 0, .flag = &use_data_direct_flag, .val = 1}, { .name = "use_rocm", .has_arg = 1, .flag = &use_rocm_flag, .val = 1}, + { .name = "use_rocm_dmabuf", .has_arg = 0, .flag = &use_rocm_dmabuf_flag, .val = 1}, { .name = "use_neuron", .has_arg = 1, .flag = &use_neuron_flag, .val = 1}, { .name = "use_neuron_dmabuf", .has_arg = 0, .flag = &use_neuron_dmabuf_flag, .val = 1}, { .name = "use_hl", .has_arg = 1, .flag = &use_hl_flag, .val = 1}, @@ -2982,6 +2990,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc) if (((use_cuda_flag || use_cuda_bus_id_flag) && !cuda_memory_supported()) || (use_cuda_dmabuf_flag && !cuda_memory_dmabuf_supported()) || (use_rocm_flag && !rocm_memory_supported()) || + (use_rocm_dmabuf_flag && !rocm_memory_dmabuf_supported()) || (use_neuron_flag && !neuron_memory_supported()) || (use_neuron_dmabuf_flag && !neuron_memory_dmabuf_supported()) || (use_hl_flag && !hl_memory_supported()) || @@ -2995,7 +3004,8 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc) } /* Memory types are mutually exclucive, make sure we were not already asked to use a different memory type. */ if (user_param->memory_type != MEMORY_HOST && - (mmap_file_flag || use_mlu_flag || use_rocm_flag || use_neuron_flag || use_hl_flag || + (mmap_file_flag || use_mlu_flag || use_neuron_flag || use_hl_flag || + (use_rocm_flag && user_param->memory_type != MEMORY_ROCM) || ((use_cuda_flag || use_cuda_bus_id_flag) && user_param->memory_type != MEMORY_CUDA))) { fprintf(stderr, " Can't use multiple memory types\n"); return FAILURE; @@ -3032,6 +3042,15 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc) user_param->memory_create = rocm_memory_create; use_rocm_flag = 0; } + if (use_rocm_dmabuf_flag) { + user_param->use_rocm_dmabuf = 1; + if (user_param->memory_type != MEMORY_ROCM) { + fprintf(stderr, "ROCm DMA-BUF cannot be used without ROCm\n"); + free(duplicates_checker); + return FAILURE; + } + use_rocm_dmabuf_flag = 0; + } if (use_neuron_flag) { user_param->neuron_core_id = strtol(optarg, NULL, 0); if (user_param->neuron_core_id < 0) { diff --git a/src/perftest_parameters.h b/src/perftest_parameters.h index 2ff43d9d..9d7b2956 100755 --- a/src/perftest_parameters.h +++ b/src/perftest_parameters.h @@ -575,6 +575,7 @@ struct perftest_parameters { int use_cuda_dmabuf; int use_data_direct; int rocm_device_id; + int use_rocm_dmabuf; int neuron_core_id; int use_neuron_dmabuf; char *hl_device_bus_id; diff --git a/src/rocm_memory.c b/src/rocm_memory.c index f361b627..72cecdd9 100644 --- a/src/rocm_memory.c +++ b/src/rocm_memory.c @@ -1,17 +1,21 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. */ #include #include #include +#include #include "rocm_memory.h" #include #if defined HAVE_HIP_HIP_VERSION_H #include #endif #include "perftest_parameters.h" +#include +#include #define ROCM_CHECK(stmt) \ do { \ @@ -25,6 +29,7 @@ struct rocm_memory_ctx { struct memory_ctx base; int device_id; + bool use_dmabuf; }; @@ -69,6 +74,49 @@ int rocm_memory_init(struct memory_ctx *ctx) { fprintf(stderr, "Couldn't initialize ROCm device\n"); return FAILURE; } + +#ifdef HAVE_ROCM_DMABUF + if (rocm_ctx->use_dmabuf) { + int dmabuf_supported = 0; + const char kernel_opt1[] = "CONFIG_DMABUF_MOVE_NOTIFY=y"; + const char kernel_opt2[] = "CONFIG_PCI_P2PDMA=y"; + int found_opt1 = 0; + int found_opt2 = 0; + FILE *fp; + struct utsname utsname; + char kernel_conf_file[128]; + char buf[256]; + + if (uname(&utsname) == -1) { + printf("could not get kernel name"); + return FAILURE; + } + + snprintf(kernel_conf_file, sizeof(kernel_conf_file), + "/boot/config-%s", utsname.release); + fp = fopen(kernel_conf_file, "r"); + if (fp == NULL) { + printf("could not open kernel conf file %s error: %m", + kernel_conf_file); + return FAILURE; + } + + while (fgets(buf, sizeof(buf), fp) != NULL) { + if (strstr(buf, kernel_opt1) != NULL) { + found_opt1 = 1; + } + if (strstr(buf, kernel_opt2) != NULL) { + found_opt2 = 1; + } + if (found_opt1 && found_opt2) { + dmabuf_supported = 1; + break; + } + } + fclose(fp); + } +#endif + return SUCCESS; } @@ -85,12 +133,43 @@ int rocm_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t hipError_t error; size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1); + struct rocm_memory_ctx *rocm_ctx = container_of(ctx, struct rocm_memory_ctx, base); error = hipMalloc(&d_A, buf_size); if (error != hipSuccess) { printf("hipMalloc error=%d\n", error); return FAILURE; } +#ifdef HAVE_ROCM_DMABUF + if (rocm_ctx->use_dmabuf) { + hipDeviceptr_t aligned_ptr; + const size_t host_page_size = sysconf(_SC_PAGESIZE); + uint64_t offset; + size_t aligned_size; + hsa_status_t status; + + // Round down to host page size + aligned_ptr = (hipDeviceptr_t)((uintptr_t)d_A & ~(host_page_size - 1)); + offset = d_A - aligned_ptr; + aligned_size = (size + offset + host_page_size - 1) & ~(host_page_size - 1); + + printf("using DMA-BUF for GPU buffer address at %#llx aligned at %#llx with aligned size %zu\n", d_A, aligned_ptr, aligned_size); + *dmabuf_fd = 0; + + status = hsa_amd_portable_export_dmabuf(d_A, aligned_size, dmabuf_fd, &offset); + if (status != HSA_STATUS_SUCCESS) { + printf("failed to export dmabuf handle for addr %p / %zu", d_A, + aligned_size); + return FAILURE; + } + + printf("dmabuf export addr %p %lu to dmabuf fd %d offset %zu\n", + d_A, aligned_size, *dmabuf_fd, offset); + + *dmabuf_offset = offset; + } +#endif + printf("allocated %lu bytes of GPU buffer at %p\n", (unsigned long)buf_size, d_A); *addr = d_A; *can_init = true; @@ -107,6 +186,14 @@ bool rocm_memory_supported() { return true; } +bool rocm_memory_dmabuf_supported() { +#ifdef HAVE_ROCM_DMABUF + return true; +#else + return false; +#endif +} + struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) { struct rocm_memory_ctx *ctx; @@ -119,6 +206,7 @@ struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) { ctx->base.copy_buffer_to_host = memcpy; ctx->base.copy_buffer_to_buffer = memcpy; ctx->device_id = params->rocm_device_id; + ctx->use_dmabuf = params->use_rocm_dmabuf; return &ctx->base; } diff --git a/src/rocm_memory.h b/src/rocm_memory.h index 0c53efc1..cacb18f7 100644 --- a/src/rocm_memory.h +++ b/src/rocm_memory.h @@ -16,6 +16,8 @@ struct perftest_parameters; bool rocm_memory_supported(); +bool rocm_memory_dmabuf_supported(); + struct memory_ctx *rocm_memory_create(struct perftest_parameters *params); @@ -25,6 +27,10 @@ inline bool rocm_memory_supported() { return false; } +inline bool rocm_memory_dmabuf_supported() { + return false; +} + inline struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) { return NULL; }