Skip to content

Commit

Permalink
Fix all the compile problems for the cuda stuff.
Browse files Browse the repository at this point in the history
  • Loading branch information
abergeron committed Apr 6, 2017
1 parent e7a7094 commit ad9173e
Show file tree
Hide file tree
Showing 17 changed files with 198 additions and 170 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ debug: install-debugc py

.PHONY: install-debugc py debug install-relc rel config

Debug/Makefile: Debug Makefile.conf
Debug/Makefile: Makefile.conf
mkdir -p Debug
ifndef INSTALL_PREFIX
(cd Debug && NUM_DEVS=${NUM_DEVS} DEV_NAMES=${DEV_NAMES} cmake .. -DCMAKE_BUILD_TYPE=Debug)
Expand Down
3 changes: 2 additions & 1 deletion src/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ cache *cache_twoq(size_t hot_size, size_t warm_size,

cache *cache_disk(const char *dirpath, cache *mem,
kwrite_fn kwrite, vwrite_fn vwrite,
kread_fn kread, vread_fn vread);
kread_fn kread, vread_fn vread,
error *e);

/* API functions */
static inline int cache_add(cache *c, cache_key_t k, cache_value_t v) {
Expand Down
20 changes: 15 additions & 5 deletions src/cache/disk.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ static void disk_destroy(cache *_c) {

cache *cache_disk(const char *dirpath, cache *mem,
kwrite_fn kwrite, vwrite_fn vwrite,
kread_fn kread, vread_fn vread) {
kread_fn kread, vread_fn vread, error *e) {
struct stat st;
disk_cache *res;
char *dirp;
Expand All @@ -414,7 +414,10 @@ cache *cache_disk(const char *dirpath, cache *mem,

dirp = malloc(dirl + 1); /* With the NUL */

if (dirp == NULL) return NULL;
if (dirp == NULL) {
error_sys(e, "malloc");
return NULL;
}

strlcpy(dirp, dirpath, dirl + 1);

Expand All @@ -425,6 +428,7 @@ cache *cache_disk(const char *dirpath, cache *mem,

if (ensurep(NULL, dirp) != 0) {
free(dirp);
error_sys(e, "ensurep");
return NULL;
}

Expand All @@ -433,18 +437,24 @@ cache *cache_disk(const char *dirpath, cache *mem,

mkdir(dirp, 0777); /* This may fail, but it's ok */

if (lstat(dirp, &st) != 0)
if (lstat(dirp, &st) != 0) {
error_sys(e, "lstat");
return NULL;
}

/* Restore the good path at the end */
dirp[dirl - 1] = sep;

if (!(st.st_mode & S_IFDIR))
if (!(st.st_mode & S_IFDIR)) {
error_set(e, GA_SYS_ERROR, "Cache path exists but is not a directory");
return NULL;
}

res = calloc(sizeof(*res), 1);
if (res == NULL)
if (res == NULL) {
error_sys(e, "calloc");
return NULL;
}

res->dirp = dirp;
res->mem = mem;
Expand Down
2 changes: 2 additions & 0 deletions src/cache/twoq.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <assert.h>
#include <stdlib.h>

#include <gpuarray/error.h>

#include "cache.h"
#include "private_config.h"

Expand Down
2 changes: 1 addition & 1 deletion src/gpuarray/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ GPUARRAY_PUBLIC int gpukernel_call(gpukernel *k, unsigned int n,
*
* This can be use to cache kernel binaries after compilation of a
* specific device. The kernel can be recreated by calling
* kernel_alloc with the binary and size and passing `GA_USE_BINARY`
* gpukernel_alloc with the binary and size and passing `GA_USE_BINARY`
* as the use flags.
*
* The returned pointer is allocated and must be freed by the caller.
Expand Down
3 changes: 2 additions & 1 deletion src/gpuarray_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ static int ga_extcopy(GpuArray *dst, const GpuArray *src) {
if (ctx->extcopy_cache == NULL)
ctx->extcopy_cache = cache_twoq(4, 8, 8, 2, extcopy_eq, extcopy_hash,
extcopy_free,
(cache_freev_fn)GpuElemwise_free);
(cache_freev_fn)GpuElemwise_free,
ctx->err);
if (ctx->extcopy_cache == NULL)
return GA_MISC_ERROR;
if (cache_add(ctx->extcopy_cache, aa, k) != 0)
Expand Down
20 changes: 11 additions & 9 deletions src/gpuarray_blas_cuda_cublas.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static inline cublasOperation_t convT(cb_transpose trans) {
}
}

static const char *error(cublasStatus_t err) {
static const char *estr(cublasStatus_t err) {
switch (err) {
case CUBLAS_STATUS_SUCCESS:
return "(cublas) Operation completed successfully.";
Expand Down Expand Up @@ -53,12 +53,12 @@ static const char *error(cublasStatus_t err) {

static inline int error_cublas(error *e, const char *msg, cublasStatus_t err) {
return error_fmt(e, (err == CUBLAS_STATUS_ARCH_MISMATCH) ? GA_DEVSUP_ERROR : GA_BLAS_ERROR,
"%s: %s", msg, error(err));
"%s: %s", msg, estr(err));
}

#define CUBLAS_EXIT_ON_ERROR(ctx, cmd) do { \
cublasStatus_t err = (cmd); \
if (err != CUBLAS_SUCCESS) { \
if (err != CUBLAS_STATUS_SUCCESS) { \
cuda_exit(ctx); \
return error_cublas((ctx)->err, #cmd, err); \
} \
Expand Down Expand Up @@ -525,13 +525,14 @@ static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
const size_t threshold = 650;
cb_transpose transT;

ASSERT_BUF(A[0]);
ctx = A[0]->ctx;

if (LARGE_VAL(M) || LARGE_VAL(N) || LARGE_VAL(K) ||
LARGE_VAL(lda) || LARGE_VAL(ldb) || LARGE_VAL(ldc) ||
LARGE_VAL(M * N) || LARGE_VAL(M * K) || LARGE_VAL(K * N))
return error_set(ctx->err, GA_XLARGE_ERROR, "Passed-in sizes would overflow the ints in the cublas interface");

ASSERT_BUF(A[0]);
ctx = A[0]->ctx;
h = (blas_handle *)ctx->blas_handle;
cuda_enter(ctx);

Expand Down Expand Up @@ -623,7 +624,7 @@ static int sgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
gpudata_release(Ta);
if (err != CUBLAS_STATUS_SUCCESS) {
cuda_exit(ctx);
return error_cublas(ctx, "cublasSgemmBatched", err);
return error_cublas(ctx->err, "cublasSgemmBatched", err);
}

for (i = 0; i < batchCount; i++) {
Expand Down Expand Up @@ -651,13 +652,14 @@ static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
const size_t threshold = 650;
cb_transpose transT;

ASSERT_BUF(A[0]);
ctx = A[0]->ctx;

if (LARGE_VAL(M) || LARGE_VAL(N) || LARGE_VAL(K) ||
LARGE_VAL(lda) || LARGE_VAL(ldb) || LARGE_VAL(ldc) ||
LARGE_VAL(M * N) || LARGE_VAL(M * K) || LARGE_VAL(K * N))
return error_set(ctx->err, GA_XLARGE_ERROR, "Passed-in sizes would overflow the ints in the cublas interface");

ASSERT_BUF(A[0]);
ctx = A[0]->ctx;
h = (blas_handle *)ctx->blas_handle;
cuda_enter(ctx);

Expand Down Expand Up @@ -697,7 +699,7 @@ static int dgemmBatch(cb_order order, cb_transpose transA, cb_transpose transB,
(double*)A[i]->ptr + offA[i], lda,
(double*)B[i]->ptr + offB[i], ldb,
&beta,
(double*)C[i]->ptr + offC[i], ldc);
(double*)C[i]->ptr + offC[i], ldc));

GA_CUDA_EXIT_ON_ERROR(ctx, cuda_record(A[i], CUDA_WAIT_READ));
GA_CUDA_EXIT_ON_ERROR(ctx, cuda_record(B[i], CUDA_WAIT_READ));
Expand Down
8 changes: 6 additions & 2 deletions src/gpuarray_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,12 @@ gpukernel *gpukernel_init(gpucontext *ctx, unsigned int count,
const char *fname, unsigned int numargs,
const int *typecodes, int flags, int *ret,
char **err_str) {
return ctx->ops->kernel_alloc(ctx, count, strings, lengths, fname, numargs,
typecodes, flags, ret, err_str);
gpukernel *res;
res = ctx->ops->kernel_alloc(ctx, count, strings, lengths, fname, numargs,
typecodes, flags, err_str);
if (res == NULL && ret)
*ret = ctx->err->code;
return res;
}

void gpukernel_retain(gpukernel *k) {
Expand Down
5 changes: 1 addition & 4 deletions src/gpuarray_buffer_collectives.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ void gpucomm_free(gpucomm* comm) {
}

const char* gpucomm_error(gpucontext* ctx) {
if (ctx->comm_ops != NULL)
return ctx->error->msg;
return "No collective ops available, API error. Is a collectives library "
"installed?";
return ctx->err->msg;
}

gpucontext* gpucomm_context(gpucomm* comm) {
Expand Down
Loading

0 comments on commit ad9173e

Please sign in to comment.