Skip to content
This repository has been archived by the owner on Apr 8, 2024. It is now read-only.

Fixing vulkan lookup #95

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 51 additions & 11 deletions src/vulkan/driver-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS

cgpu->work_size = 64;

applog(LOG_NOTICE, "GPU %d: selecting lookup gap of 4", cgpu->driver_id);
cgpu->lookup_gap = 4;
applog(LOG_NOTICE, "GPU %d: selecting lookup gap of %d", cgpu->driver_id, cgpu->lookup_gap);

unsigned int bsize = 1024;
unsigned int bsize = 8192;
size_t ipt = (bsize / cgpu->lookup_gap + (bsize % cgpu->lookup_gap > 0));

if (!cgpu->buffer_size) {
Expand Down Expand Up @@ -168,26 +168,66 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
state->sharedMemorySize = state->memConstantSize + state->memParamsSize + state->memInputSize + 2 * state->memOutputSize;

state->gpuLocalMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->bufSize, true, true);
if (state->gpuLocalMemory == NULL) {
applog(LOG_ERR, "GPU %d: Failed to allocate local memory", cgpu->driver_id);
return NULL;
}
state->gpuSharedMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->sharedMemorySize, false, true);
if (state->gpuSharedMemory == NULL) {
applog(LOG_ERR, "GPU %d: Failed to allocate shared memory", cgpu->driver_id);
return NULL;
}

state->padbuffer8 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuLocalMemory, state->bufSize, 0);
if (state->padbuffer8 == NULL) {
applog(LOG_ERR, "GPU %d: Failed to create padbuffer8", cgpu->driver_id);
return NULL;
}

uint64_t o = 0;
state->gpu_constants = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memConstantSize, o);
if (state->gpu_constants == NULL) {
applog(LOG_ERR, "GPU %d: Failed to create gpu constants buffer", cgpu->driver_id);
return NULL;
}

o += state->memConstantSize;
state->gpu_params = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memParamsSize, o);
if (state->gpu_params == NULL) {
applog(LOG_ERR, "GPU %d: Failed to create gpu params buffer", cgpu->driver_id);
return NULL;
}

o += state->memParamsSize;
state->CLbuffer0 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memInputSize, o);
if (state->CLbuffer0 == NULL) {
applog(LOG_ERR, "GPU %d: Failed to create CLbuffer0 buffer", cgpu->driver_id);
return NULL;
}

o += state->memInputSize;
state->outputBuffer[0] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o);
if (state->outputBuffer[0] == NULL) {
applog(LOG_ERR, "GPU %d: Failed to create output[0] buffer", cgpu->driver_id);
return NULL;
}

o += state->memOutputSize;
state->outputBuffer[1] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o);

if (state->outputBuffer[1] == NULL) {
applog(LOG_ERR, "GPU %d: Failed to create output[1] buffer", cgpu->driver_id);
return NULL;
}

gVulkan.vkGetDeviceQueue(state->vkDevice, computeQueueFamilyIndex, 0, &state->queue);

state->pipelineLayout = bindBuffers(state->vkDevice, &state->descriptorSet, &state->descriptorPool, &state->descriptorSetLayout,
state->padbuffer8, state->gpu_constants, state->gpu_params, state->CLbuffer0, state->outputBuffer[0], state->outputBuffer[1]
);
if (state->pipelineLayout == NULL) {
applog(LOG_ERR, "GPU %d: Failed to bind buffers and create pipeline layout", cgpu->driver_id);
return NULL;
}

void *ptr = NULL;
CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, 0, state->memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL);
Expand Down Expand Up @@ -443,16 +483,14 @@ static int vulkan_scrypt_positions(
// transfer input to GPU
char *ptr = NULL;
uint64_t tfxOrigin = state->memParamsSize + state->memConstantSize;
CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memInputSize, 0, (void **)&ptr), "vkMapMemory", 0);
CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memInputSize, 0, (void **)&ptr), "vkMapMemory", SPACEMESH_API_ERROR);
memcpy(ptr, (const void*)pdata, PREIMAGE_SIZE);
gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory);

params.N = N;
params.hash_len_bits = hash_len_bits;
params.concurrent_threads = cgpu->thread_concurrency;

const uint64_t delay = 5ULL * 1000ULL * 1000ULL * 1000ULL;

tfxOrigin = state->memParamsSize + state->memConstantSize + state->memInputSize;

do {
Expand All @@ -470,15 +508,16 @@ static int vulkan_scrypt_positions(
CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, VK_NULL_HANDLE), "vkQueueSubmit", 0);
CHECK_RESULT(gVulkan.vkQueueWaitIdle(state->queue), "vkQueueWaitIdle", 0);
#else
CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", 0);
VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, 0, 0, 0, 0, 1, &state->commandBuffer, 0, 0 };
CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, state->fence), "vkQueueSubmit", 0);
CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", SPACEMESH_API_ERROR);
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, 0, 0, 0, 0, 1, &state->commandBuffer, 0, 0};
CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, state->fence), "vkQueueSubmit", SPACEMESH_API_ERROR);
VkResult res;
do {
uint64_t delay = 5ULL * 1000ULL * 1000ULL * 1000ULL;
res = gVulkan.vkWaitForFences(state->vkDevice, 1, &state->fence, VK_TRUE, delay);
} while (res == VK_TIMEOUT);
gVulkan.vkResetFences(state->vkDevice, 1, &state->fence);
CHECK_RESULT(res, "vkWaitForFences", SPACEMESH_API_ERROR);
CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", SPACEMESH_API_ERROR);
#endif

if (computePow) {
Expand All @@ -499,7 +538,8 @@ static int vulkan_scrypt_positions(
if (computeLeafs) {
uint32_t length = (uint32_t)min(chunkSize, outLength);

CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memOutputSize, 0, (void **)&ptr), "vkMapMemory", 0);
CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memOutputSize, 0, (void **)&ptr), "vkMapMemory", SPACEMESH_API_ERROR);

memcpy(out, ptr, length);
gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory);
out += length;
Expand Down
5 changes: 3 additions & 2 deletions src/vulkan/gen/scrypt-chacha.comp
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,9 @@ main()
Nfactor++;
}

const uint effective_concurrency = (concurrent_threads << 9) >> Nfactor;

// const uint effective_concurrency = (concurrent_threads << 12) >> Nfactor;
const uint effective_concurrency = concurrent_threads;

password[0] = buffer0[0];
password[1] = buffer0[1];
password[2] = buffer0[2];
Expand Down
5 changes: 4 additions & 1 deletion src/vulkan/vulkan-helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,10 @@ VkDeviceMemory allocateGPUMemory(int index, VkDevice vkDevice, const VkDeviceSi
VkBuffer createBuffer(VkDevice vkDevice, uint32_t computeQueueFamilyIndex, VkDeviceMemory memory, VkDeviceSize bufferSize, VkDeviceSize offset)
{
// 4Gb limit on AMD and Nvidia
if (bufferSize >= 0x100000000) bufferSize = 0xffffffff;
if (bufferSize >= 0x100000000) {
applog(LOG_ERR, "Buffer size too big, setting to 4Gb\n");
bufferSize = 0xffffffff;
}

const VkBufferCreateInfo bufferCreateInfo = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
Expand Down