Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Brodey/cudnn #55

Merged
merged 22 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
350c7d8
chore: bm
brodeynewman Oct 9, 2024
1ad672a
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 9, 2024
59150ee
chore: merge
brodeynewman Oct 9, 2024
c7d0b7d
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 11, 2024
29a919e
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 14, 2024
233b8e9
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 14, 2024
fc00189
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 17, 2024
79ccd26
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 23, 2024
38a351c
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Oct 29, 2024
ab2e209
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Nov 6, 2024
ccd7c31
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Nov 8, 2024
25cad41
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Nov 9, 2024
11f8e43
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Nov 11, 2024
8e3d836
Merge branch 'main' of github.com:kevmo314/scuda
brodeynewman Nov 18, 2024
bb19397
chore: cudnn wip
brodeynewman Nov 19, 2024
937dddf
fix: cudnn example
brodeynewman Nov 21, 2024
bc73a2c
chore: more annotations
brodeynewman Nov 21, 2024
68597d9
chore: more annotations
brodeynewman Nov 21, 2024
7278ae9
fix: nullable pointer types
brodeynewman Nov 22, 2024
6668e54
chore: test + codegen fixes for const struct
brodeynewman Nov 22, 2024
23e8827
test: break test for ci validation
brodeynewman Nov 22, 2024
d5198e3
chore: make test pass
brodeynewman Nov 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions codegen/annotations.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <nvml.h>
#include <cuda.h>
#include <cudnn.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>

Expand Down Expand Up @@ -5545,6 +5546,10 @@ cudaError_t cudaGetExportTable(const void **ppExportTable, const cudaUUID_t *pEx
* @param symbolPtr SEND_RECV
*/
cudaError_t cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr);
/**
* @param handle RECV_ONLY
*/
cudnnStatus_t cudnnCreate(cudnnHandle_t *handle);
/**
* @param handle RECV_ONLY
*/
Expand Down Expand Up @@ -5577,3 +5582,69 @@ cublasStatus_t cublasSgemm_v2(cublasHandle_t handle,
const float *B, int ldb,
const float *beta,
float *C, int ldc);
/**
* @param handle SEND_ONLY
* @param activationDesc SEND_ONLY
* @param alpha SEND_ONLY NULLABLE
* @param xDesc SEND_ONLY
* @param x SEND_ONLY
* @param beta SEND_ONLY NULLABLE
* @param yDesc SEND_ONLY
* @param y SEND_ONLY
*/
cudnnStatus_t cudnnActivationForward(
cudnnHandle_t handle,
cudnnActivationDescriptor_t activationDesc,
const void *alpha,
const cudnnTensorDescriptor_t *xDesc,
const void *x,
const void *beta,
const cudnnTensorDescriptor_t *yDesc,
void *y);

/**
* @param tensorDesc SEND_ONLY
* @param format SEND_ONLY
* @param dataType SEND_ONLY
* @param n SEND_ONLY
* @param c SEND_ONLY
* @param h SEND_ONLY
* @param w SEND_ONLY
*/
cudnnStatus_t cudnnSetTensor4dDescriptor(
cudnnTensorDescriptor_t tensorDesc,
cudnnTensorFormat_t format,
cudnnDataType_t dataType,
int n,
int c,
int h,
int w);

/**
* @param tensorDesc SEND_RECV
*/
cudnnStatus_t cudnnCreateTensorDescriptor(
cudnnTensorDescriptor_t *tensorDesc);

/**
* @param activationDesc SEND_RECV
*/
cudnnStatus_t cudnnCreateActivationDescriptor(
cudnnActivationDescriptor_t *activationDesc);

/**
* @param activationDesc SEND_ONLY
* @param mode SEND_ONLY
* @param reluNanOpt SEND_ONLY
* @param coef SEND_ONLY
*/
cudnnStatus_t cudnnSetActivationDescriptor(
cudnnActivationDescriptor_t activationDesc,
cudnnActivationMode_t mode,
cudnnNanPropagation_t reluNanOpt,
double coef);

/**
* @param handle SEND_ONLY
*/
cudnnStatus_t cudnnDestroy(cudnnHandle_t handle);
25 changes: 21 additions & 4 deletions codegen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,22 @@ def client_rpc_write(self, f):
server_type=self.ptr.format(),
)
)

f.write(
" ({param_name} != nullptr && rpc_write(0, {param_name}, sizeof({base_type})) < 0) ||\n".format(
param_name=self.parameter.name,
base_type=self.ptr.ptr_to.format(),
# void is treated differently from non void pointer types
base_type=(self.ptr.format() if self.ptr.ptr_to.format() == "const void" else self.ptr.ptr_to.format()),
)
)

@property
def server_declaration(self) -> str:
c = self.ptr.ptr_to.const
self.ptr.ptr_to.const = False
# void is treated differently from non void pointer types
s = f" {self.ptr.format()} {self.parameter.name}_null_check;\n" + \
f" {self.ptr.ptr_to.format()} {self.parameter.name};\n"
f" {self.ptr.format() if self.ptr.ptr_to.format() == "void" else self.ptr.ptr_to.format()} {self.parameter.name};\n"
self.ptr.ptr_to.const = c
return s

Expand All @@ -121,7 +124,8 @@ def server_rpc_read(self, f):
f.write(
" ({param_name}_null_check && rpc_read(conn, &{param_name}, sizeof({base_type})) < 0) ||\n".format(
param_name=self.parameter.name,
base_type=self.ptr.ptr_to.format(),
# void is treated differently from non void pointer types
base_type=(self.ptr.format() if self.ptr.ptr_to.format() == "const void" else self.ptr.ptr_to.format()),
)
)

Expand Down Expand Up @@ -379,7 +383,11 @@ def client_rpc_write(self, f):
def server_declaration(self) -> str:
if isinstance(self.type_, Pointer) and self.recv:
return f" {self.type_.ptr_to.format()} {self.parameter.name};\n"
return f" {self.type_.format()} {self.parameter.name};\n"
# ensure we don't have a const struct, otherwise we can't initialise it properly; ex: "const cudnnTensorDescriptor_t xDesc;" is invalid...
# but "const cudnnTensorDescriptor_t *xDesc" IS valid. This subtle change carries reprecussions.
elif "const " in self.type_.format() and not "void" in self.type_.format() and not "*" in self.type_.format():
return f" {self.type_.format().replace("const", "")} {self.parameter.name};\n"
else: return f" {self.type_.format()} {self.parameter.name};\n"

def server_rpc_read(self, f):
if not self.send:
Expand Down Expand Up @@ -605,6 +613,8 @@ def error_const(return_type: str) -> str:
return "cudaErrorDevicesUnavailable"
if return_type == "cublasStatus_t":
return "CUBLAS_STATUS_NOT_INITIALIZED"
if return_type == "cudnnStatus_t":
return "CUDNN_STATUS_NOT_INITIALIZED"
raise NotImplementedError("Unknown return type: %s" % return_type)


Expand All @@ -618,6 +628,8 @@ def main():
options = ParserOptions(preprocessor=make_gcc_preprocessor(defines=["CUBLASAPI="]))

nvml_ast: ParsedData = parse_file("/usr/include/nvml.h", options=options)
cudnn_graph_ast: ParsedData = parse_file("/usr/include/cudnn_graph.h", options=options)
cudnn_ops_ast: ParsedData = parse_file("/usr/include/cudnn_ops.h", options=options)
cuda_ast: ParsedData = parse_file("/usr/include/cuda.h", options=options)
cublas_ast: ParsedData = parse_file("/usr/include/cublas_api.h", options=options)
cudart_ast: ParsedData = parse_file(
Expand All @@ -632,6 +644,8 @@ def main():
+ cuda_ast.namespace.functions
+ cudart_ast.namespace.functions
+ cublas_ast.namespace.functions
+ cudnn_graph_ast.namespace.functions
+ cudnn_ops_ast.namespace.functions
)

functions_with_annotations: list[tuple[Function, Function, list[Operation]]] = []
Expand Down Expand Up @@ -675,6 +689,7 @@ def main():
f.write(
"#include <nvml.h>\n"
"#include <cuda.h>\n"
"#include <cudnn.h>\n"
"#include <cublas_v2.h>\n"
"#include <cuda_runtime_api.h>\n\n"
"#include <cstring>\n"
Expand Down Expand Up @@ -815,7 +830,9 @@ def main():
with open("gen_server.cpp", "w") as f:
f.write(
"#include <nvml.h>\n"
"#include <iostream>\n"
"#include <cuda.h>\n"
"#include <cudnn.h>\n"
"#include <cublas_v2.h>\n"
"#include <cuda_runtime_api.h>\n\n"
"#include <cstring>\n"
Expand Down
7 changes: 7 additions & 0 deletions codegen/gen_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -888,3 +888,10 @@
#define RPC_cublasCreate_v2 887
#define RPC_cublasDestroy_v2 888
#define RPC_cublasSgemm_v2 889
#define RPC_cudnnCreate 890
#define RPC_cudnnDestroy 891
#define RPC_cudnnCreateTensorDescriptor 892
#define RPC_cudnnSetTensor4dDescriptor 893
#define RPC_cudnnCreateActivationDescriptor 894
#define RPC_cudnnSetActivationDescriptor 895
#define RPC_cudnnActivationForward 896
105 changes: 105 additions & 0 deletions codegen/gen_client.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <nvml.h>
#include <cuda.h>
#include <cudnn.h>
#include <cublas_v2.h>
#include <cuda_runtime_api.h>

Expand Down Expand Up @@ -11160,6 +11161,103 @@ cublasStatus_t cublasSgemm_v2(cublasHandle_t handle, cublasOperation_t transa, c
return return_value;
}

cudnnStatus_t cudnnCreate(cudnnHandle_t* handle)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnCreate) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_read(0, handle, sizeof(cudnnHandle_t)) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnDestroy(cudnnHandle_t handle)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnDestroy) < 0 ||
rpc_write(0, &handle, sizeof(cudnnHandle_t)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t* tensorDesc)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnCreateTensorDescriptor) < 0 ||
rpc_write(0, tensorDesc, sizeof(cudnnTensorDescriptor_t)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_read(0, tensorDesc, sizeof(cudnnTensorDescriptor_t)) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnSetTensor4dDescriptor) < 0 ||
rpc_write(0, &tensorDesc, sizeof(cudnnTensorDescriptor_t)) < 0 ||
rpc_write(0, &format, sizeof(cudnnTensorFormat_t)) < 0 ||
rpc_write(0, &dataType, sizeof(cudnnDataType_t)) < 0 ||
rpc_write(0, &n, sizeof(int)) < 0 ||
rpc_write(0, &c, sizeof(int)) < 0 ||
rpc_write(0, &h, sizeof(int)) < 0 ||
rpc_write(0, &w, sizeof(int)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t* activationDesc)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnCreateActivationDescriptor) < 0 ||
rpc_write(0, activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_read(0, activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, cudnnNanPropagation_t reluNanOpt, double coef)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnSetActivationDescriptor) < 0 ||
rpc_write(0, &activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_write(0, &mode, sizeof(cudnnActivationMode_t)) < 0 ||
rpc_write(0, &reluNanOpt, sizeof(cudnnNanPropagation_t)) < 0 ||
rpc_write(0, &coef, sizeof(double)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

cudnnStatus_t cudnnActivationForward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y)
{
cudnnStatus_t return_value;
if (rpc_start_request(0, RPC_cudnnActivationForward) < 0 ||
rpc_write(0, &handle, sizeof(cudnnHandle_t)) < 0 ||
rpc_write(0, &activationDesc, sizeof(cudnnActivationDescriptor_t)) < 0 ||
rpc_write(0, &alpha, sizeof(const void*)) < 0 ||
(alpha != nullptr && rpc_write(0, alpha, sizeof(const void*)) < 0) ||
rpc_write(0, &xDesc, sizeof(const cudnnTensorDescriptor_t)) < 0 ||
rpc_write(0, &x, sizeof(const void*)) < 0 ||
rpc_write(0, &beta, sizeof(const void*)) < 0 ||
(beta != nullptr && rpc_write(0, beta, sizeof(const void*)) < 0) ||
rpc_write(0, &yDesc, sizeof(const cudnnTensorDescriptor_t)) < 0 ||
rpc_write(0, &y, sizeof(void*)) < 0 ||
rpc_wait_for_response(0) < 0 ||
rpc_end_response(0, &return_value) < 0)
return CUDNN_STATUS_NOT_INITIALIZED;
return return_value;
}

std::unordered_map<std::string, void *> functionMap = {
{"__cudaRegisterVar", (void *)__cudaRegisterVar},
{"__cudaRegisterFunction", (void *)__cudaRegisterFunction},
Expand Down Expand Up @@ -12018,6 +12116,13 @@ std::unordered_map<std::string, void *> functionMap = {
{"cublasCreate_v2", (void *)cublasCreate_v2},
{"cublasDestroy_v2", (void *)cublasDestroy_v2},
{"cublasSgemm_v2", (void *)cublasSgemm_v2},
{"cudnnCreate", (void *)cudnnCreate},
{"cudnnDestroy", (void *)cudnnDestroy},
{"cudnnCreateTensorDescriptor", (void *)cudnnCreateTensorDescriptor},
{"cudnnSetTensor4dDescriptor", (void *)cudnnSetTensor4dDescriptor},
{"cudnnCreateActivationDescriptor", (void *)cudnnCreateActivationDescriptor},
{"cudnnSetActivationDescriptor", (void *)cudnnSetActivationDescriptor},
{"cudnnActivationForward", (void *)cudnnActivationForward},
{"cuMemcpy_ptds", (void *)cuMemcpy},
{"cuMemcpyAsync_ptsz", (void *)cuMemcpyAsync},
{"cuMemcpyPeer_ptds", (void *)cuMemcpyPeer},
Expand Down
Loading
Loading