Skip to content

Commit

Permalink
Brodey/cudnn (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
brodeynewman authored Nov 22, 2024
1 parent 5c31f28 commit 62bbf9e
Show file tree
Hide file tree
Showing 7 changed files with 519 additions and 6 deletions.
71 changes: 71 additions & 0 deletions codegen/annotations.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <nvml.h>
#include <cuda.h>
#include <cudnn.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>

Expand Down Expand Up @@ -5545,6 +5546,10 @@ cudaError_t cudaGetExportTable(const void **ppExportTable, const cudaUUID_t *pEx
* @param symbolPtr SEND_RECV
*/
cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr);
/**
* @param handle RECV_ONLY
*/
cudnnStatus_t cudnnCreate(cudnnHandle_t *handle);
/**
* @param handle RECV_ONLY
*/
Expand Down Expand Up @@ -5577,3 +5582,69 @@ cublasStatus_t cublasSgemm_v2(cublasHandle_t handle,
const float *B, int ldb,
const float *beta,
float *C, int ldc);
/**
* @param handle SEND_ONLY
* @param activationDesc SEND_ONLY
* @param alpha SEND_ONLY NULLABLE
* @param xDesc SEND_ONLY
* @param x SEND_ONLY
* @param beta SEND_ONLY NULLABLE
* @param yDesc SEND_ONLY
* @param y SEND_ONLY
*/
cudnnStatus_t cudnnActivationForward(
cudnnHandle_t handle,
cudnnActivationDescriptor_t activationDesc,
const void *alpha,
const cudnnTensorDescriptor_t *xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t *yDesc,
void *y);

/**
* @param tensorDesc SEND_ONLY
* @param format SEND_ONLY
* @param dataType SEND_ONLY
* @param n SEND_ONLY
* @param c SEND_ONLY
* @param h SEND_ONLY
* @param w SEND_ONLY
*/
cudnnStatus_t cudnnSetTensor4dDescriptor(
cudnnTensorDescriptor_t tensorDesc,
cudnnTensorFormat_t format,
cudnnDataType_t dataType,
int n,
int c,
int h,
int w);

/**
* @param tensorDesc SEND_RECV
*/
cudnnStatus_t cudnnCreateTensorDescriptor(
cudnnTensorDescriptor_t *tensorDesc);

/**
* @param activationDesc SEND_RECV
*/
cudnnStatus_t cudnnCreateActivationDescriptor(
cudnnActivationDescriptor_t *activationDesc);

/**
* @param activationDesc SEND_ONLY
* @param mode SEND_ONLY
* @param reluNanOpt SEND_ONLY
* @param coef SEND_ONLY
*/
cudnnStatus_t cudnnSetActivationDescriptor(
cudnnActivationDescriptor_t activationDesc,
cudnnActivationMode_t mode,
cudnnNanPropagation_t reluNanOpt,
double coef);

/**
* @param handle SEND_ONLY
*/
cudnnStatus_t cudnnDestroy(cudnnHandle_t handle);
25 changes: 21 additions & 4 deletions codegen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,22 @@ def client_rpc_write(self, f):
server_type=self.ptr.format(),
)
)

f.write(
" ({param_name} != nullptr && rpc_write(0, {param_name}, sizeof({base_type})) < 0) ||\n".format(
param_name=self.parameter.name,
base_type=self.ptr.ptr_to.format(),
# void is treated differently from non void pointer types
base_type=(self.ptr.format() if self.ptr.ptr_to.format() == "const void" else self.ptr.ptr_to.format()),
)
)

@property
def server_declaration(self) -> str:
c = self.ptr.ptr_to.const
self.ptr.ptr_to.const = False
# void is treated differently from non void pointer types
s = f" {self.ptr.format()} {self.parameter.name}_null_check;\n" + \
f" {self.ptr.ptr_to.format()} {self.parameter.name};\n"
f" {self.ptr.format() if self.ptr.ptr_to.format() == "void" else self.ptr.ptr_to.format()} {self.parameter.name};\n"
self.ptr.ptr_to.const = c
return s

Expand All @@ -121,7 +124,8 @@ def server_rpc_read(self, f):
f.write(
" ({param_name}_null_check && rpc_read(conn, &{param_name}, sizeof({base_type})) < 0) ||\n".format(
param_name=self.parameter.name,
base_type=self.ptr.ptr_to.format(),
# void is treated differently from non void pointer types
base_type=(self.ptr.format() if self.ptr.ptr_to.format() == "const void" else self.ptr.ptr_to.format()),
)
)

Expand Down Expand Up @@ -379,7 +383,11 @@ def client_rpc_write(self, f):
def server_declaration(self) -> str:
if isinstance(self.type_, Pointer) and self.recv:
return f" {self.type_.ptr_to.format()} {self.parameter.name};\n"
return f" {self.type_.format()} {self.parameter.name};\n"
# ensure we don't have a const struct, otherwise we can't initialise it properly; ex: "const cudnnTensorDescriptor_t xDesc;" is invalid...
# but "const cudnnTensorDescriptor_t *xDesc" IS valid. This subtle change carries reprecussions.
elif "const " in self.type_.format() and not "void" in self.type_.format() and not "*" in self.type_.format():
return f" {self.type_.format().replace("const", "")} {self.parameter.name};\n"
else: return f" {self.type_.format()} {self.parameter.name};\n"

def server_rpc_read(self, f):
if not self.send:
Expand Down Expand Up @@ -605,6 +613,8 @@ def error_const(return_type: str) -> str:
return "cudaErrorDevicesUnavailable"
if return_type == "cublasStatus_t":
return "CUBLAS_STATUS_NOT_INITIALIZED"
if return_type == "cudnnStatus_t":
return "CUDNN_STATUS_NOT_INITIALIZED"
raise NotImplementedError("Unknown return type: %s" % return_type)


Expand All @@ -618,6 +628,8 @@ def main():
options = ParserOptions(preprocessor=make_gcc_preprocessor(defines=["CUBLASAPI="]))

nvml_ast: ParsedData = parse_file("/usr/include/nvml.h", options=options)
cudnn_graph_ast: ParsedData = parse_file("/usr/include/cudnn_graph.h", options=options)
cudnn_ops_ast: ParsedData = parse_file("/usr/include/cudnn_ops.h", options=options)
cuda_ast: ParsedData = parse_file("/usr/include/cuda.h", options=options)
cublas_ast: ParsedData = parse_file("/usr/include/cublas_api.h", options=options)
cudart_ast: ParsedData = parse_file(
Expand All @@ -632,6 +644,8 @@ def main():
+ cuda_ast.namespace.functions
+ cudart_ast.namespace.functions
+ cublas_ast.namespace.functions
+ cudnn_graph_ast.namespace.functions
+ cudnn_ops_ast.namespace.functions
)

functions_with_annotations: list[tuple[Function, Function, list[Operation]]] = []
Expand Down Expand Up @@ -675,6 +689,7 @@ def main():
f.write(
"#include <nvml.h>\n"
"#include <cuda.h>\n"
"#include <cudnn.h>\n"
"#include <cublas_v2.h>\n"
"#include <cuda_runtime_api.h>\n\n"
"#include <cstring>\n"
Expand Down Expand Up @@ -815,7 +830,9 @@ def main():
with open("gen_server.cpp", "w") as f:
f.write(
"#include <nvml.h>\n"
"#include <iostream>\n"
"#include <cuda.h>\n"
"#include <cudnn.h>\n"
"#include <cublas_v2.h>\n"
"#include <cuda_runtime_api.h>\n\n"
"#include <cstring>\n"
Expand Down
7 changes: 7 additions & 0 deletions codegen/gen_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -888,3 +888,10 @@
#define RPC_cublasCreate_v2 887
#define RPC_cublasDestroy_v2 888
#define RPC_cublasSgemm_v2 889
#define RPC_cudnnCreate 890
#define RPC_cudnnDestroy 891
#define RPC_cudnnCreateTensorDescriptor 892
#define RPC_cudnnSetTensor4dDescriptor 893
#define RPC_cudnnCreateActivationDescriptor 894
#define RPC_cudnnSetActivationDescriptor 895
#define RPC_cudnnActivationForward 896
105 changes: 105 additions & 0 deletions codegen/gen_client.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <nvml.h>
#include <cuda.h>
#include <cudnn.h>
#include <cublas_v2.h>
#include <cuda_runtime_api.h>

Expand Down Expand Up @@ -11160,6 +11161,103 @@ cublasStatus_t cublasSgemm_v2(cublasHandle_t handle, cublasOperation_t transa, c
return return_value;
}

cudnnStatus_t cudnnCreate(cudnnHandle_t* handle)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnCreate) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_read(0, handle, sizeof(cudnnHandle_t)) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnDestroy(cudnnHandle_t handle)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnDestroy) < 0 ||
rpc_write(0, &handle, sizeof(cudnnHandle_t)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t* tensorDesc)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnCreateTensorDescriptor) < 0 ||
rpc_write(0, tensorDesc, sizeof(cudnnTensorDescriptor_t)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_read(0, tensorDesc, sizeof(cudnnTensorDescriptor_t)) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnSetTensor4dDescriptor) < 0 ||
rpc_write(0, &tensorDesc, sizeof(cudnnTensorDescriptor_t)) < 0 ||
rpc_write(0, &format, sizeof(cudnnTensorFormat_t)) < 0 ||
rpc_write(0, &dataType, sizeof(cudnnDataType_t)) < 0 ||
rpc_write(0, &n, sizeof(int)) < 0 ||
rpc_write(0, &c, sizeof(int)) < 0 ||
rpc_write(0, &h, sizeof(int)) < 0 ||
rpc_write(0, &w, sizeof(int)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t* activationDesc)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnCreateActivationDescriptor) < 0 ||
rpc_write(0, activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_read(0, activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, cudnnNanPropagation_t reluNanOpt, double coef)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnSetActivationDescriptor) < 0 ||
rpc_write(0, &activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_write(0, &mode, sizeof(cudnnActivationMode_t)) < 0 ||
rpc_write(0, &reluNanOpt, sizeof(cudnnNanPropagation_t)) < 0 ||
rpc_write(0, &coef, sizeof(double)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnActivationForward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnActivationForward) < 0 ||
rpc_write(0, &handle, sizeof(cudnnHandle_t)) < 0 ||
rpc_write(0, &activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_write(0, &alpha, sizeof(const void*)) < 0 ||
(alpha != nullptr && rpc_write(0, alpha, sizeof(const void*)) < 0) ||
rpc_write(0, &xDesc, sizeof(const cudnnTensorDescriptor_t)) < 0 ||
rpc_write(0, &x, sizeof(const void*)) < 0 ||
rpc_write(0, &beta, sizeof(const void*)) < 0 ||
(beta != nullptr && rpc_write(0, beta, sizeof(const void*)) < 0) ||
rpc_write(0, &yDesc, sizeof(const cudnnTensorDescriptor_t)) < 0 ||
rpc_write(0, &y, sizeof(void*)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

std::unordered_map<std::string, void *> functionMap = {
{"__cudaRegisterVar", (void *)__cudaRegisterVar},
{"__cudaRegisterFunction", (void *)__cudaRegisterFunction},
Expand Down Expand Up @@ -12018,6 +12116,13 @@ std::unordered_map<std::string, void *> functionMap = {
{"cublasCreate_v2", (void *)cublasCreate_v2},
{"cublasDestroy_v2", (void *)cublasDestroy_v2},
{"cublasSgemm_v2", (void *)cublasSgemm_v2},
{"cudnnCreate", (void *)cudnnCreate},
{"cudnnDestroy", (void *)cudnnDestroy},
{"cudnnCreateTensorDescriptor", (void *)cudnnCreateTensorDescriptor},
{"cudnnSetTensor4dDescriptor", (void *)cudnnSetTensor4dDescriptor},
{"cudnnCreateActivationDescriptor", (void *)cudnnCreateActivationDescriptor},
{"cudnnSetActivationDescriptor", (void *)cudnnSetActivationDescriptor},
{"cudnnActivationForward", (void *)cudnnActivationForward},
{"cuMemcpy_ptds", (void *)cuMemcpy},
{"cuMemcpyAsync_ptsz", (void *)cuMemcpyAsync},
{"cuMemcpyPeer_ptds", (void *)cuMemcpyPeer},
Expand Down
Loading

0 comments on commit 62bbf9e

Please sign in to comment.