Skip to content

Commit

Permalink
CUDA: compress mode option and default to size
Browse files Browse the repository at this point in the history
cuda 12.8 added the option to specify stronger compression for binaries.
  • Loading branch information
Green-Sky committed Feb 28, 2025
1 parent 36c258e commit 6cdc5d3
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copie
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
"ggml: cuda link binary compression mode; requires cuda 12.8+")
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")

option(GGML_HIP "ggml: use HIP" OFF)
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ if (CUDAToolkit_FOUND)

set(CUDA_FLAGS -use_fast_math)

if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
# Options are:
# - none (not recommended)
# - speed (nvcc's default)
# - balance
# - size
list(APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE})
endif()

if (GGML_FATAL_WARNINGS)
list(APPEND CUDA_FLAGS -Werror all-warnings)
endif()
Expand Down

0 comments on commit 6cdc5d3

Please sign in to comment.