Skip to content

Commit

Permalink
1,2,3
Browse files Browse the repository at this point in the history
  • Loading branch information
Tony-Tan committed Jan 22, 2018
1 parent d824450 commit e50b2c8
Show file tree
Hide file tree
Showing 10 changed files with 153 additions and 2 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ __global__ void hello_world(void)
{
printf("GPU: Hello world!\n");
}
int main()
int main(int argc,char **argv)
{
printf("CPU: Hello world!\n");
hello_world<<<1,10>>>();
Expand Down
1 change: 1 addition & 0 deletions 1_check_dimension/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_executable(check_dimension check_dimension.cu)
20 changes: 20 additions & 0 deletions 1_check_dimension/check_dimension.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <cuda_runtime.h>
#include <stdio.h>
__global__ void checkIndex(void)
{
printf("threadIdx:(%d,%d,%d) blockIdx:(%d,%d,%d) blockDim:(%d,%d,%d)\
gridDim(%d,%d,%d)\n",threadIdx.x,threadIdx.y,threadIdx.z,
blockIdx.x,blockIdx.y,blockIdx.z,blockDim.x,blockDim.y,blockDim.z,
gridDim.x,gridDim.y,gridDim.z);
}
int main(int argc,char **argv)
{
int nElem=6;
dim3 block(3);
dim3 grid((nElem+block.x-1)/block.x);
printf("grid.x %d grid.y %d grid.z %d\n",grid.x,grid.y,grid.z);
printf("block.x %d block.y %d block.z %d\n",block.x,block.y,block.z);
checkIndex<<<grid,block>>>();
cudaDeviceReset();
return 0;
}
1 change: 1 addition & 0 deletions 2_grid_block/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_executable(grid_block grid_block.cu)
24 changes: 24 additions & 0 deletions 2_grid_block/grid_block.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include <cuda_runtime.h>
#include <stdio.h>
int main(int argc,char ** argv)
{
int nElem=1024;
dim3 block(1024);
dim3 grid((nElem-1)/block.x+1);
printf("grid.x %d block.x %d\n",grid.x,block.x);

block.x=512;
grid.x=(nElem-1)/block.x+1;
printf("grid.x %d block.x %d\n",grid.x,block.x);

block.x=256;
grid.x=(nElem-1)/block.x+1;
printf("grid.x %d block.x %d\n",grid.x,block.x);

block.x=128;
grid.x=(nElem-1)/block.x+1;
printf("grid.x %d block.x %d\n",grid.x,block.x);

cudaDeviceReset();
return 0;
}
1 change: 1 addition & 0 deletions 3_sum_arrays/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_executable(sum_arrays sum_arrays.cu)
87 changes: 87 additions & 0 deletions 3_sum_arrays/sum_arrays.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include <cuda_runtime.h>
#include <stdio.h>
#include "freshman.h"
void checkResult(float * hostRef,float * gpuRef,const int N)
{
double epsilon=1.0E-8;
for(int i=0;i<N;i++)
{
if(abs(hostRef[i]-gpuRef[i])>epsilon)
{
printf("Results don\'t match!");
printf("%f(hostRef[%d] )!= %f(gpuRef[%d])",hostRef[i],i,gpuRef[i],i);
break;
}
}
printf("Check result success!\n");
}
void initialData(float* ip,int size)
{
time_t t;
srand((unsigned )time(&t));
for(int i=0;i<size;i++)
{
ip[i]=(float)(rand()&0xff)/10.0f;
}
}
void sumArrays(float * a,float * b,float * res,const int size)
{
for(int i=0;i<size;i+=4)
{
res[i]=a[i]+b[i];
res[i+1]=a[i+1]+b[i+1];
res[i+2]=a[i+2]+b[i+2];
res[i+3]=a[i+3]+b[i+3];
}
}
__global__ void sumArraysGPU(float*a,float*b,float*res)
{
int i=threadIdx.x;
res[i]=a[i]+b[i];
}
int main(int argc,char **argv)
{
int dev = 0;
cudaSetDevice(dev);

int nElem=32;
printf("Vector size:%d\n",nElem);
int nByte=sizeof(float)*nElem;
float *a_h=(float*)malloc(nByte);
float *b_h=(float*)malloc(nByte);
float *res_h=(float*)malloc(nByte);
float *res_from_gpu_h=(float*)malloc(nByte);
memset(res_h,0,nByte);
memset(res_from_gpu_h,0,nByte);

float *a_d,*b_d,*res_d;
CHECK(cudaMalloc((float**)&a_d,nByte));
CHECK(cudaMalloc((float**)&b_d,nByte));
CHECK(cudaMalloc((float**)&res_d,nByte));

initialData(a_h,nElem);
initialData(b_h,nElem);

CHECK(cudaMemcpy(a_d,a_h,nByte,cudaMemcpyHostToDevice));
CHECK(cudaMemcpy(b_d,b_h,nByte,cudaMemcpyHostToDevice));

dim3 block(nElem);
dim3 grid(nElem/block.x);
sumArraysGPU<<<grid,block>>>(a_d,b_d,res_d);
printf("Execution configuration<<<%d,%d>>>\n",block.x,grid.x);

CHECK(cudaMemcpy(res_from_gpu_h,res_d,nByte,cudaMemcpyDeviceToHost));
sumArrays(a_h,b_h,res_h,nElem);

checkResult(res_h,res_from_gpu_h,nElem);
cudaFree(a_d);
cudaFree(b_d);
cudaFree(res_d);

free(a_h);
free(b_h);
free(res_h);
free(res_from_gpu_h);

return 0;
}
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
Project(CUDA_Freshman CXX C CUDA)
add_subdirectory(1_0_hello_world)
include_directories(./include)
add_subdirectory(0_hello_world)
add_subdirectory(1_check_dimension)
add_subdirectory(2_grid_block)
add_subdirectory(3_sum_arrays)
13 changes: 13 additions & 0 deletions include/freshman.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef FRESHMAN_H
#define FRESHMAN_H
#define CHECK(call)\
{\
const cudaError_t error=call;\
if(error!=cudaSuccess)\
{\
printf("ERROR: %s:%d,",__FILE__,__LINE__);\
printf("code:%d,reason:%s\n",error,cudaGetErrorString(error));\
exit(1);\
}\
}
#endif//FRESHMAN_H

0 comments on commit e50b2c8

Please sign in to comment.