-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpu_processor.cu
35 lines (27 loc) · 1.1 KB
/
gpu_processor.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#define DATA_SIZE 10000000
#define BLOCKS_COUNT 1
#define THREADS_COUNT 64
__global__ void findMean(unsigned int dataForBlock, float *inputData, float *results)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
float result = 0;
for (int i = 0; i < dataForBlock; i++)
{
result += inputData[index * dataForBlock + i];
}
result /= dataForBlock;
results[index] = result;
}
void processWithGPU(float *blocks, float *results, unsigned int blockSize, unsigned int blocksCount)
{
unsigned int realDataCount = blockSize * blocksCount;
cudaSetDevice(0);
float *deviceInputData, *deviceResults;
cudaMalloc((void **)&deviceInputData, realDataCount * sizeof(float));
cudaMalloc((void **)&deviceResults, realDataCount * sizeof(float));
cudaMemcpy(deviceInputData, blocks, realDataCount * sizeof(float), cudaMemcpyHostToDevice);
findMean<<<1, blocksCount>>>(blockSize, deviceInputData, deviceResults);
cudaMemcpy((void *)results, deviceResults, blocksCount * sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(deviceInputData);
cudaFree(deviceResults);
}