Skip to content

Commit

Permalink
some updates added GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
galabovaa committed Nov 8, 2024
1 parent 7aede2a commit ac36f78
Show file tree
Hide file tree
Showing 18 changed files with 1,010 additions and 16 deletions.
27 changes: 26 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,27 @@ set(DEBUG_MEMORY "Off" CACHE STRING "Sanitizers")
# emscripten
option(EMSCRIPTEN_HTML "Emscripten HTML output" OFF)

# option(CUPDLP_GPU "Build pdlp with CPU" ON)
# message(STATUS "Build pdlp with CPU: ${CUPDLP_CPU}")

option(CUPDLP_GPU "Build pdlp with GPU" OFF)
message(STATUS "Build pdlp with GPU: ${CUPDLP_GPU}")

if (NOT LINUX)
set (CUPDLP_GPU OFF)
message(STATUS "CUPLDP with Nvidia is only supported on Linux at the moment. Using CPU version.")
endif()

if (CUPDLP_GPU)
set (CUPDLP_CPU OFF)
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
message(NOTICE "Set build cuPDLP with CUDA")
include(FindCUDAConf.cmake)
else()
set (CUPDLP_CPU ON)
set(CUDA_LIBRARY-NOTFOUND true)
endif()

if (BUILD_CXX)
# Default Build Type to be Release
get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
Expand Down Expand Up @@ -248,7 +269,11 @@ if (BUILD_CXX)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
message(STATUS "IPO / LTO: enabled")
endif()
endif()
if (CUPDLP_GPU AND CMAKE_INTERPROCEDURAL_OPTIMIZATION)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE)
message(STATUS "IPO / LTO is not supported at the moment when PDLP is using GPU: LTO disabled.")
endif()


include(CheckCXXSourceCompiles)
check_cxx_source_compiles(
Expand Down
33 changes: 33 additions & 0 deletions FindCUDAConf.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

set(CUDA_LIBRARY-NOTFOUND, OFF)
message(NOTICE "Finding CUDA environment")
message(NOTICE " - CUDA Home detected at $ENV{CUDA_HOME}")
set(CMAKE_CUDA_ARCHITECTURES "all")
set(CMAKE_CUDA_PATH "$ENV{CUDA_HOME}")
set(CMAKE_CUDA_COMPILER "${CMAKE_CUDA_PATH}/bin/nvcc")

enable_language(CUDA)

find_library(CUDA_LIBRARY_ART
NAMES cudart
HINTS "${CMAKE_CUDA_PATH}/lib64/"
REQUIRED
)
find_library(CUDA_LIBRARY_SPS
NAMES cusparse
HINTS "${CMAKE_CUDA_PATH}/lib64/"
REQUIRED
)
find_library(CUDA_LIBRARY_BLS
NAMES cublas
HINTS "${CMAKE_CUDA_PATH}/lib64/"
REQUIRED
)
if (${CUDA_LIBRARY-NOTFOUND})
message(WARNING " - CUDA Libraries not detected at $ENV{CUDA_HOME}")
else ()
message(NOTICE " - CUDA Libraries detected at $ENV{CUDA_HOME}")
set(CUDA_LIBRARY ${CUDA_LIBRARY_ART} ${CUDA_LIBRARY_SPS} ${CUDA_LIBRARY_BLS})
message(NOTICE " - :${CUDA_LIBRARY}")
endif ()

13 changes: 13 additions & 0 deletions check/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@ if (FORTRAN)
${HIGHS_SOURCE_DIR}/check)
endif()

if (CUPLDP_GPU)
# add a test
add_executable(testcudalin ${HIGHS_SOURCE_DIR}/src/pdlp/cupdlp/cuda/test_cuda_linalg.c)
add_executable(testcublas ${HIGHS_SOURCE_DIR}/src/pdlp/cupdlp/cuda/test_cublas.c)

set_target_properties(testcudalin PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
#target_include_directories(cudalinalg PRIVATE ${CUPDLP_INCLUDE_DIR}/cuda)
target_link_libraries(testcudalin PRIVATE highs ${CUDA_LIBRARY})

set_target_properties(testcublas PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(testcublas PRIVATE highs ${CUDA_LIBRARY})
endif()

if (NOT FAST_BUILD OR ALL_TESTS)
# prepare Catch library
set(CATCH_INCLUDE_DIR ${HIGHS_SOURCE_DIR}/extern)
Expand Down
41 changes: 29 additions & 12 deletions cmake/cpp-highs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,35 @@ install(FILES ${PROJECT_BINARY_DIR}/highs_export.h

string (TOLOWER ${PROJECT_NAME} lower)

install(TARGETS highs
EXPORT ${lower}-targets
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/highs
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/highs)

# Add library targets to the build-tree export set
export(TARGETS highs
NAMESPACE ${PROJECT_NAMESPACE}::highs
FILE "${HIGHS_BINARY_DIR}/highs-targets.cmake")
# if (NOT CUPDLP_GPU)
install(TARGETS highs
EXPORT ${lower}-targets
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/highs
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/highs)

# Add library targets to the build-tree export set
export(TARGETS highs
NAMESPACE ${PROJECT_NAMESPACE}::highs
FILE "${HIGHS_BINARY_DIR}/highs-targets.cmake")
# else()

# install(TARGETS highs cudalin
# EXPORT ${lower}-targets
# INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/highs
# ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
# LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
# RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
# PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/highs)

# # Add library targets to the build-tree export set
# export(TARGETS highs cudalin
# NAMESPACE ${PROJECT_NAMESPACE}::highs
# FILE "${HIGHS_BINARY_DIR}/highs-targets.cmake")
# endif()


install(EXPORT ${lower}-targets
NAMESPACE ${PROJECT_NAMESPACE}::
Expand Down
9 changes: 9 additions & 0 deletions cmake/python-highs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ target_link_libraries(_core PRIVATE pybind11::headers)
# sources for python
target_sources(_core PUBLIC ${sources_python} ${headers_python})

if (CUPDLP_GPU)
enable_language(CXX CUDA)
target_sources(_core PRIVATE ${cuda_sources_python})

target_include_directories(_core PUBLIC "/usr/local/cuda/include")
set_target_properties(_core PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()


# include directories for python
target_include_directories(_core PUBLIC ${include_dirs_python})

Expand Down
6 changes: 6 additions & 0 deletions cmake/sources-python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ set(cupdlp_headers_python
src/pdlp/cupdlp/cupdlp_step.h
src/pdlp/cupdlp/cupdlp_utils.c)

set(cuda_sources_python
pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cu
pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cuh
pdlp/cupdlp/cuda/cupdlp_cudalinalg.cuh
pdlp/cupdlp/cuda/cupdlp_cudalinalg.cu)

set(basiclu_sources_python
src/ipm/basiclu/basiclu_factorize.c
src/ipm/basiclu/basiclu_get_factors.c
Expand Down
7 changes: 7 additions & 0 deletions cmake/sources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ set(cupdlp_headers
pdlp/cupdlp/cupdlp_step.h
pdlp/cupdlp/cupdlp_utils.c)

set(cuda_sources
pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cu
pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cuh
pdlp/cupdlp/cuda/cupdlp_cudalinalg.cuh
pdlp/cupdlp/cuda/cupdlp_cudalinalg.cu)


set(basiclu_sources
ipm/basiclu/basiclu_factorize.c
ipm/basiclu/basiclu_get_factors.c
Expand Down
11 changes: 11 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,17 @@ else()
# $<BUILD_INTERFACE:${HIGHS_SOURCE_DIR}/extern/pdqsort>)

target_sources(highs PRIVATE ${sources} ${headers} ${win_version_file})

# Optional Cuda
if (CUPDLP_GPU)
enable_language(CXX CUDA)
target_sources(highs PRIVATE ${cuda_sources})

set_target_properties(highs PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

target_link_libraries(highs ${CUDA_LIBRARY} m)
endif()

target_include_directories(highs PRIVATE ${include_dirs})

if(MSVC)
Expand Down
1 change: 1 addition & 0 deletions src/HConfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#cmakedefine FAST_BUILD
#cmakedefine ZLIB_FOUND
#cmakedefine CUPDLP_CPU
#cmakedefine CUPDLP_GPU
#cmakedefine CMAKE_BUILD_TYPE "@CMAKE_BUILD_TYPE@"
#cmakedefine CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@"
#cmakedefine HIGHSINT64
Expand Down
121 changes: 121 additions & 0 deletions src/pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include "cupdlp_cuda_kernels.cuh"

dim3 cuda_gridsize(cupdlp_int n) {
cupdlp_int k = (n - 1) / CUPDLP_BLOCK_SIZE + 1;
cupdlp_int x = k;
cupdlp_int y = 1;
if (x > 65535) {
x = ceil(sqrt(k));
y = (n - 1) / (x * CUPDLP_BLOCK_SIZE) + 1;
}
dim3 d = {x, y, 1};
return d;
}

__global__ void element_wise_dot_kernel(cupdlp_float *x, const cupdlp_float *y,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] *= y[i];
}

__global__ void element_wise_div_kernel(cupdlp_float *x, const cupdlp_float *y,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] /= y[i];
}

__global__ void element_wise_projlb_kernel(cupdlp_float *x,
const cupdlp_float *lb,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] < lb[i] ? lb[i] : x[i];
}

__global__ void element_wise_projub_kernel(cupdlp_float *x,
const cupdlp_float *ub,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] > ub[i] ? ub[i] : x[i];
}

__global__ void element_wise_projSamelb_kernel(cupdlp_float *x,
const cupdlp_float lb,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] <= lb ? lb : x[i];
}

__global__ void element_wise_projSameub_kernel(cupdlp_float *x,
const cupdlp_float ub,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] >= ub ? ub : x[i];
}

__global__ void element_wise_initHaslb_kernal(cupdlp_float *haslb,
const cupdlp_float *lb,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) haslb[i] = lb[i] > bound ? 1.0 : 0.0;
}

__global__ void element_wise_initHasub_kernal(cupdlp_float *hasub,
const cupdlp_float *ub,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) hasub[i] = ub[i] < bound ? 1.0 : 0.0;
}

__global__ void element_wise_filterlb_kernal(cupdlp_float *x,
const cupdlp_float *lb,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = lb[i] > bound ? lb[i] : 0.0;
}

__global__ void element_wise_filterub_kernal(cupdlp_float *x,
const cupdlp_float *ub,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = ub[i] < bound ? ub[i] : 0.0;
}

__global__ void init_cuda_vec_kernal(cupdlp_float *x, const cupdlp_float val,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = val;
}

//xUpdate = x - dPrimalStep * (cost - ATy)
__global__ void primal_grad_step_kernal(cupdlp_float *xUpdate,
const cupdlp_float *x,
const cupdlp_float *cost,
const cupdlp_float *ATy,
const cupdlp_float dPrimalStep,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) xUpdate[i] = x[i] - dPrimalStep * (cost[i] - ATy[i]);
}

//yUpdate = y + dDualStep * (b -2AxUpdate + Ax)
__global__ void dual_grad_step_kernal(cupdlp_float *yUpdate,
const cupdlp_float *y,
const cupdlp_float *b,
const cupdlp_float *Ax,
const cupdlp_float *AxUpdate,
const cupdlp_float dDualStep,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) yUpdate[i] = y[i] + dDualStep * (b[i] - 2 * AxUpdate[i] + Ax[i]);
}

// z = x - y
__global__ void naive_sub_kernal(cupdlp_float *z, const cupdlp_float *x,
const cupdlp_float *y, const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) z[i] = x[i] - y[i];
}
Loading

0 comments on commit ac36f78

Please sign in to comment.