Skip to content

Commit

Permalink
able to malloc, but hangs by default
Browse files Browse the repository at this point in the history
  • Loading branch information
pvelesko committed Oct 19, 2024
1 parent 209add7 commit 52cf774
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 20 deletions.
12 changes: 6 additions & 6 deletions bitcode/malloc.cl
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
#define ALIGN_SIZE(size) (((size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1))
#define DEVICE_HEAP_SIZE (1024 * 1024) // 1MB heap

__global uchar* device_heap_test;
__global uchar* __chipspv_device_heap;


void init_device_heap(uchar* device_heap) {
device_heap_test = (__global uchar*)device_heap;
__chipspv_device_heap = (__global uchar*)device_heap;
}

// Structure for the header of each block in the heap
Expand Down Expand Up @@ -56,11 +56,11 @@ __global void* malloc(unsigned int size) {
size = ALIGN_SIZE(size);

// Pointers to the mutex and initialization flag
__global volatile atomic_int* mutex = (__global volatile atomic_int*)&device_heap_test[0];
__global int* initialized = (__global int*)&device_heap_test[sizeof(atomic_int)];
__global volatile atomic_int* mutex = (__global volatile atomic_int*)&__chipspv_device_heap[0];
__global int* initialized = (__global int*)&__chipspv_device_heap[sizeof(atomic_int)];

// Pointer to the start of the heap
__global uchar* heap = (__global uchar*)device_heap_test + sizeof(atomic_int) + sizeof(int);
__global uchar* heap = (__global uchar*)__chipspv_device_heap + sizeof(atomic_int) + sizeof(int);
int real_heap_size = DEVICE_HEAP_SIZE - sizeof(atomic_int) - sizeof(int);

lock(mutex);
Expand Down Expand Up @@ -123,7 +123,7 @@ __global void* malloc(unsigned int size) {
}

void free(void* ptr) {
uchar* device_heap = (__global uchar*)device_heap_test;
uchar* device_heap = (__global uchar*)__chipspv_device_heap;
if (ptr == NULL) return;

__global volatile atomic_int* mutex = (__global volatile atomic_int*)&device_heap[0];
Expand Down
1 change: 1 addition & 0 deletions include/hip/spirv_hip.hh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ extern "C" {
// A global flag included in all HIP device modules for signaling
// abort request.
__attribute__((weak)) __device__ int32_t __chipspv_abort_called;
__attribute__((weak)) __device__ void* __chipspv_device_heap;

__device__ void __chipspv_abort(int32_t *abort_flag);

Expand Down
22 changes: 12 additions & 10 deletions samples/0_MatrixMultiply/MatrixMultiply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ __global__ void gpuMatrixMul(const float *__restrict A,
uint M, uint N, uint K)

{
int* test = (int*)malloc(10 * sizeof(int));
// Thread identifiers
const uint globalRow =
hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; // Row ID of C (0..M)
Expand Down Expand Up @@ -263,16 +264,17 @@ int main() {
err = hipEventRecord(events[i * 2], NULL);
ERR_CHECK;
// Lauching kernel from host
hipLaunchKernelGGL(
gpuMatrixMul,
#ifndef MM_SHARED
dim3(WIDTH / THREADS_PER_BLOCK, WIDTH / THREADS_PER_BLOCK),
dim3(THREADS_PER_BLOCK, THREADS_PER_BLOCK),
#else
dim3((WIDTH / THREADS_PER_BLOCK), (WIDTH / THREADS_PER_BLOCK)),
dim3(THREADS_PER_BLOCK, 4),
#endif
0, 0, gpuMatrix1, gpuMatrix2, gpuMultiplyMatrix, WIDTH, WIDTH, WIDTH);
// hipLaunchKernelGGL(
// gpuMatrixMul,
// #ifndef MM_SHARED
// dim3(WIDTH / THREADS_PER_BLOCK, WIDTH / THREADS_PER_BLOCK),
// dim3(THREADS_PER_BLOCK, THREADS_PER_BLOCK),
// #else
// dim3((WIDTH / THREADS_PER_BLOCK), (WIDTH / THREADS_PER_BLOCK)),
// dim3(THREADS_PER_BLOCK, 4),
// #endif
// 0, 0, gpuMatrix1, gpuMatrix2, gpuMultiplyMatrix, WIDTH, WIDTH, WIDTH);
gpuMatrixMul<<<1, 1>>>(gpuMatrix1, gpuMatrix2, gpuMultiplyMatrix, WIDTH, WIDTH, WIDTH);
ERR_CHECK_2;
err = hipEventRecord(events[i * 2 + 1], NULL);
ERR_CHECK;
Expand Down
18 changes: 14 additions & 4 deletions src/CHIPBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,19 @@ static void queueVariableInitShadowKernel(chipstar::Queue *Q,
queueKernel(Q, K);
}

static void initDeviceHeap(chipstar::Queue &Q, chipstar::Module &M) {
static void initDeviceHeap(chipstar::Queue *Q, chipstar::Module *M) {

Check warning on line 87 in src/CHIPBackend.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBackend.cc:87:45 [readability-identifier-length]

parameter name 'Q' is too short, expected at least 3 characters

Check warning on line 87 in src/CHIPBackend.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBackend.cc:87:66 [readability-identifier-length]

parameter name 'M' is too short, expected at least 3 characters
logTrace("initDeviceHeap()");
chipstar::DeviceVar *Var = M.getGlobalVar(ChipDeviceHeapName);
chipstar::DeviceVar *Var = M->getGlobalVar(ChipDeviceHeapName);
if (!Var)

Check warning on line 90 in src/CHIPBackend.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBackend.cc:90:8 [readability-implicit-bool-conversion]

implicit conversion 'chipstar::DeviceVar *' -> bool

Check warning on line 90 in src/CHIPBackend.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBackend.cc:90:12 [readability-braces-around-statements]

statement should be inside braces
return;
void* device_heap = Var->getDevAddr();
// TODO: initialize device heap by enqueuing a kernel that calls init_device_heap
// prob call queueVariableInfoShadowKernel
auto *Ctx = Q->getContext();
void* init_device_heap_ptr = Ctx->allocate(sizeof(void*), 8, hipMemoryTypeDevice);

Check warning on line 94 in src/CHIPBackend.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBackend.cc:94:9 [readability-identifier-naming]

invalid case style for local variable 'init_device_heap_ptr'

Check warning on line 94 in src/CHIPBackend.cc

View workflow job for this annotation

GitHub Actions / cpp-linter

src/CHIPBackend.cc:94:61 [readability-magic-numbers]

8 is a magic number; consider replacing it with a named constant
Var->setDevAddr(init_device_heap_ptr);
logInfo("initDeviceHeap() device_heap: {}", (void*)device_heap);
logInfo("initDeviceHeap() init_device_heap_ptr: {}", (void*)init_device_heap_ptr);
queueVariableBindShadowKernel(Q, M, Var);
Q->finish();
}

chipstar::CallbackData::CallbackData(hipStreamCallback_t TheCallbackF,
Expand Down Expand Up @@ -382,6 +387,9 @@ void chipstar::Module::prepareDeviceVariablesNoLock(chipstar::Device *Device,
return;
}

// Initialize device heap
initDeviceHeap(Queue, this);

auto Err = allocateDeviceVariablesNoLock(Device, Queue);
(void)Err;

Expand Down Expand Up @@ -411,6 +419,8 @@ void chipstar::Module::prepareDeviceVariablesNoLock(chipstar::Device *Device,
if (QueuedKernels)
Queue->finish();



DeviceVariablesInitialized_ = true;
}

Expand Down

0 comments on commit 52cf774

Please sign in to comment.