spacemeshos · poszu · Apr 11, 2023 · Apr 11, 2023 · Apr 11, 2023
diff --git a/src/vulkan/driver-vulkan.cpp b/src/vulkan/driver-vulkan.cpp
@@ -136,10 +136,10 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
 
 	cgpu->work_size = 64;
 
-	applog(LOG_NOTICE, "GPU %d: selecting lookup gap of 4", cgpu->driver_id);
 	cgpu->lookup_gap = 4;
+	applog(LOG_NOTICE, "GPU %d: selecting lookup gap of %d", cgpu->driver_id, cgpu->lookup_gap);
 
-	unsigned int bsize = 1024;
+	unsigned int bsize = 8192;
 	size_t ipt = (bsize / cgpu->lookup_gap + (bsize % cgpu->lookup_gap > 0));
 
 	if (!cgpu->buffer_size) {
@@ -168,26 +168,66 @@ static _vulkanState *initVulkan(struct cgpu_info *cgpu, char *name, size_t nameS
 	state->sharedMemorySize = state->memConstantSize + state->memParamsSize + state->memInputSize + 2 * state->memOutputSize;
 
 	state->gpuLocalMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->bufSize, true, true);
+	if (state->gpuLocalMemory == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to allocate local memory", cgpu->driver_id);
+		return NULL;
+	}
 	state->gpuSharedMemory = allocateGPUMemory(state->deviceId, state->vkDevice, state->sharedMemorySize, false, true);
+	if (state->gpuSharedMemory == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to allocate shared memory", cgpu->driver_id);
+		return NULL;
+	}
 
 	state->padbuffer8 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuLocalMemory, state->bufSize, 0);
+	if (state->padbuffer8 == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to create padbuffer8", cgpu->driver_id);
+		return NULL;
+	}
 
 	uint64_t o = 0;
 	state->gpu_constants = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memConstantSize, o);
+	if (state->gpu_constants == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to create gpu constants buffer", cgpu->driver_id);
+		return NULL;
+	}
+
 	o += state->memConstantSize;
 	state->gpu_params = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memParamsSize, o);
+	if (state->gpu_params == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to create gpu params buffer", cgpu->driver_id);
+		return NULL;
+	}
+
 	o += state->memParamsSize;
 	state->CLbuffer0 = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memInputSize, o);
+	if (state->CLbuffer0 == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to create CLbuffer0 buffer", cgpu->driver_id);
+		return NULL;
+	}
+
 	o += state->memInputSize;
 	state->outputBuffer[0] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o);
+	if (state->outputBuffer[0] == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to create output[0] buffer", cgpu->driver_id);
+		return NULL;
+	}
+
 	o += state->memOutputSize;
 	state->outputBuffer[1] = createBuffer(state->vkDevice, computeQueueFamilyIndex, state->gpuSharedMemory, state->memOutputSize, o);
-
+	if (state->outputBuffer[1] == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to create output[1] buffer", cgpu->driver_id);
+		return NULL;
+	}
+
 	gVulkan.vkGetDeviceQueue(state->vkDevice, computeQueueFamilyIndex, 0, &state->queue);
 
 	state->pipelineLayout = bindBuffers(state->vkDevice, &state->descriptorSet, &state->descriptorPool, &state->descriptorSetLayout,
 		state->padbuffer8, state->gpu_constants, state->gpu_params, state->CLbuffer0, state->outputBuffer[0], state->outputBuffer[1]
 	);
+	if (state->pipelineLayout == NULL) {
+		applog(LOG_ERR, "GPU %d: Failed to bind buffers and create pipeline layout", cgpu->driver_id);
+		return NULL;
+	}
 
 	void *ptr = NULL;
 	CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, 0, state->memConstantSize, 0, (void **)&ptr), "vkMapMemory", NULL);
@@ -443,16 +483,14 @@ static int vulkan_scrypt_positions(
 		// transfer input to GPU
 		char *ptr = NULL;
 		uint64_t tfxOrigin = state->memParamsSize + state->memConstantSize;
-		CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memInputSize, 0, (void **)&ptr), "vkMapMemory", 0);
+		CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memInputSize, 0, (void **)&ptr), "vkMapMemory", SPACEMESH_API_ERROR);
 		memcpy(ptr, (const void*)pdata, PREIMAGE_SIZE);
 		gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory);
 
 		params.N = N;
 		params.hash_len_bits = hash_len_bits;
 		params.concurrent_threads = cgpu->thread_concurrency;
 
-		const uint64_t delay = 5ULL * 1000ULL * 1000ULL * 1000ULL;
-
 		tfxOrigin = state->memParamsSize + state->memConstantSize + state->memInputSize;
 
 		do {
@@ -470,15 +508,16 @@ static int vulkan_scrypt_positions(
 			CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, VK_NULL_HANDLE), "vkQueueSubmit", 0);
 			CHECK_RESULT(gVulkan.vkQueueWaitIdle(state->queue), "vkQueueWaitIdle", 0);
 #else
-			CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", 0);
-			VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, 0, 0, 0, 0, 1, &state->commandBuffer, 0, 0 };
-			CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, state->fence), "vkQueueSubmit", 0);
+			CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", SPACEMESH_API_ERROR);
+			VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, 0, 0, 0, 0, 1, &state->commandBuffer, 0, 0};
+			CHECK_RESULT(gVulkan.vkQueueSubmit(state->queue, 1, &submitInfo, state->fence), "vkQueueSubmit", SPACEMESH_API_ERROR);
 			VkResult res;
 			do {
 				uint64_t delay = 5ULL * 1000ULL * 1000ULL * 1000ULL;
 				res = gVulkan.vkWaitForFences(state->vkDevice, 1, &state->fence, VK_TRUE, delay);
 			} while (res == VK_TIMEOUT);
-			gVulkan.vkResetFences(state->vkDevice, 1, &state->fence);
+			CHECK_RESULT(res, "vkWaitForFences", SPACEMESH_API_ERROR);
+			CHECK_RESULT(gVulkan.vkResetFences(state->vkDevice, 1, &state->fence), "vkResetFences", SPACEMESH_API_ERROR);
 #endif
 
 			if (computePow) {
@@ -499,7 +538,8 @@ static int vulkan_scrypt_positions(
 			if (computeLeafs) {
 				uint32_t length = (uint32_t)min(chunkSize, outLength);
 
-				CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memOutputSize, 0, (void **)&ptr), "vkMapMemory", 0);
+				CHECK_RESULT(gVulkan.vkMapMemory(state->vkDevice, state->gpuSharedMemory, tfxOrigin, state->memOutputSize, 0, (void **)&ptr), "vkMapMemory", SPACEMESH_API_ERROR);
+
 				memcpy(out, ptr, length);
 				gVulkan.vkUnmapMemory(state->vkDevice, state->gpuSharedMemory);
 				out += length;

diff --git a/src/vulkan/gen/scrypt-chacha.comp b/src/vulkan/gen/scrypt-chacha.comp
@@ -438,8 +438,9 @@ main()
 		Nfactor++;
 	}
 
-	const uint effective_concurrency = (concurrent_threads << 9) >> Nfactor;
-
+	// const uint effective_concurrency = (concurrent_threads << 12) >> Nfactor;
+	const uint effective_concurrency = concurrent_threads;
+
 	password[0] = buffer0[0];
 	password[1] = buffer0[1];
 	password[2] = buffer0[2];

diff --git a/src/vulkan/vulkan-helpers.c b/src/vulkan/vulkan-helpers.c
@@ -230,7 +230,10 @@ VkDeviceMemory allocateGPUMemory(int index,  VkDevice vkDevice, const VkDeviceSi
 VkBuffer createBuffer(VkDevice vkDevice, uint32_t computeQueueFamilyIndex, VkDeviceMemory memory, VkDeviceSize bufferSize, VkDeviceSize offset)
 {
 	// 4Gb limit on AMD and Nvidia
-	if (bufferSize >= 0x100000000) bufferSize = 0xffffffff;
+	if (bufferSize >= 0x100000000) {
+		applog(LOG_ERR, "Buffer size too big, setting to 4Gb\n");
+		bufferSize = 0xffffffff;
+	}
 
 	const VkBufferCreateInfo bufferCreateInfo = {
 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,