Skip to content

Commit

Permalink
11_matirx_add
Browse files Browse the repository at this point in the history
  • Loading branch information
Tony-Tan committed Apr 17, 2018
1 parent ce82be2 commit 4dd0ad1
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 15 deletions.
10 changes: 5 additions & 5 deletions 10_reduceInteger/reduceInteger.cu
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ int main(int argc,char** argv)
//cpu_sum = recursiveReduce(tmp, size);
for (int i = 0; i < size; i++)
cpu_sum += tmp[i];
printf("%d ", cpu_sum);
printf("cpu sum:%d \n", cpu_sum);
iElaps = cpuSecond() - iStart;
printf("cpu reduce elapsed %lf ms cpu_sum: %d\n", iElaps, cpu_sum);
printf("cpu reduce elapsed %lf ms cpu_sum: %d\n", iElaps, cpu_sum);


//kernel 1:reduceNeighbored
Expand All @@ -180,7 +180,7 @@ int main(int argc,char** argv)
gpu_sum = 0;
for (int i = 0; i < grid.x; i++)
gpu_sum += odata_host[i];
printf("gpu warmup elapsed %lf ms gpu_sum: %d<<<grid %d block %d>>>\n",
printf("gpu warmup elapsed %lf ms gpu_sum: %d<<<grid %d block %d>>>\n",
iElaps, gpu_sum, grid.x, block.x);

//kernel 1:reduceNeighbored
Expand All @@ -195,7 +195,7 @@ int main(int argc,char** argv)
gpu_sum = 0;
for (int i = 0; i < grid.x; i++)
gpu_sum += odata_host[i];
printf("gpu reduceNeighbored elapsed %lf ms gpu_sum: %d<<<grid %d block %d>>>\n",
printf("gpu reduceNeighbored elapsed %lf ms gpu_sum: %d<<<grid %d block %d>>>\n",
iElaps, gpu_sum, grid.x, block.x);

//kernel 2:reduceNeighboredLess
Expand Down Expand Up @@ -224,7 +224,7 @@ int main(int argc,char** argv)
gpu_sum = 0;
for (int i = 0; i < grid.x; i++)
gpu_sum += odata_host[i];
printf("gpu reduceInterleaved elapsed %lf ms gpu_sum: %d<<<grid %d block %d>>>\n",
printf("gpu reduceInterleaved elapsed %lf ms gpu_sum: %d<<<grid %d block %d>>>\n",
iElaps, gpu_sum, grid.x, block.x);
// free host memory

Expand Down
Empty file.
23 changes: 16 additions & 7 deletions 11_simple_sum_matrix2D/simple_sum_matrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ int main(int argc,char** argv)
{
//printf("strating...\n");
//initDevice(0);
int nx=1<<12;
int ny=1<<12;
int nx=1<<13;
int ny=1<<13;
int nxy=nx*ny;
int nBytes=nxy*sizeof(float);

Expand Down Expand Up @@ -62,10 +62,18 @@ int main(int argc,char** argv)

double iStart,iElaps;
// cpu compute
//double iStart=cpuSecond();
//sumMatrix2D_CPU(A_host,B_host,C_host,nx,ny);
//double iElaps=cpuSecond()-iStart;
//printf("CPU Execution Time elapsed %f sec\n",iElaps);
iStart=cpuSecond();
sumMatrix2D_CPU(A_host,B_host,C_host,nx,ny);
iElaps=cpuSecond()-iStart;
printf("CPU Execution Time elapsed %f sec\n",iElaps);
//warm up
// 2d block and 2d grid
dim3 block_0(32,32);
dim3 grid_0((nx-1)/block_0.x+1,(ny-1)/block_0.y+1);
iStart=cpuSecond();
sumMatrix<<<grid_0,block_0>>>(A_dev,B_dev,C_dev,nx,ny);
CHECK(cudaDeviceSynchronize());
printf("Warm Up \n");

// 2d block and 2d grid
dim3 block(dimx,dimy);
Expand All @@ -77,7 +85,8 @@ int main(int argc,char** argv)
printf("GPU Execution configuration<<<(%d,%d),(%d,%d)>>> Time elapsed %f sec\n",
grid.x,grid.y,block.x,block.y,iElaps);
CHECK(cudaMemcpy(C_from_gpu,C_dev,nBytes,cudaMemcpyDeviceToHost));
//checkResult(C_host,C_from_gpu,nxy);

checkResult(C_host,C_from_gpu,nxy);

cudaFree(A_dev);
cudaFree(B_dev);
Expand Down
6 changes: 3 additions & 3 deletions include/freshman.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ void checkResult(float * hostRef,float * gpuRef,const int N)
{
if(abs(hostRef[i]-gpuRef[i])>epsilon)
{
printf("Results don\'t match!");
printf("%f(hostRef[%d] )!= %f(gpuRef[%d])",hostRef[i],i,gpuRef[i],i);
break;
printf("Results don\'t match!\n");
printf("%f(hostRef[%d] )!= %f(gpuRef[%d])\n",hostRef[i],i,gpuRef[i],i);
return;
}
}
printf("Check result success!\n");
Expand Down

0 comments on commit 4dd0ad1

Please sign in to comment.