Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVIDIA: Adding cuPQC as a backend for ML-KEM. #2044

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .CMake/alg_support.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -338,18 +338,36 @@ if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCT
endif()
endif()

if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
if(OQS_USE_CUPQC)
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_cuda "" ON "OQS_ENABLE_KEM_ml_kem_512" OFF)
endif()
endif()

if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF)
endif()
endif()

if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
if(OQS_USE_CUPQC)
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_cuda "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF)
endif()
endif()

if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF)
endif()
endif()

if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
if(OQS_USE_CUPQC)
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_cuda "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF)
endif()
endif()


if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,19 @@ jobs:
--numprocesses=auto \
--ignore=tests/test_code_conventions.py ${{ matrix.PYTEST_ARGS }}"

cupqc-buildcheck:
name: Check that code builds with OQS_USE_CUPQC=ON
runs-on: ubuntu-latest
container: openquantumsafe/ci-ubuntu-latest:latest
steps:
- name: Checkout code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # pin@v4
- name: Configure
run: mkdir build && cd build && cuPQC_DIR=/cupqc/cupqc/cupqc-pkg-0.2.0/cmake/ CUDACXX=/usr/local/cuda-12.6/bin/nvcc cmake -GNinja -DOQS_USE_CUPQC=ON -DCMAKE_CUDA_ARCHITECTURES=80 .. && cmake -LA -N ..
- name: Build code
run: ninja
working-directory: build

linux_cross_compile:
runs-on: ubuntu-latest
container: openquantumsafe/ci-ubuntu-latest:latest
Expand Down
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ option(OQS_LIBJADE_BUILD "Enable formally verified implementation of supported a
option(OQS_PERMIT_UNSUPPORTED_ARCHITECTURE "Permit compilation on an an unsupported architecture." OFF)
option(OQS_STRICT_WARNINGS "Enable all compiler warnings." OFF)
option(OQS_EMBEDDED_BUILD "Compile liboqs for an Embedded environment without a full standard library." OFF)
option(OQS_USE_CUPQC "Utilize cuPQC as the backend for supported PQC algorithms." OFF)

# Libfuzzer isn't supported on gcc
if('${CMAKE_C_COMPILER_ID}' STREQUAL 'Clang')
Expand Down Expand Up @@ -140,6 +141,16 @@ else()
message(FATAL_ERROR "Unknown or unsupported processor: " ${CMAKE_SYSTEM_PROCESSOR} ". Override by setting OQS_PERMIT_UNSUPPORTED_ARCHITECTURE=ON")
endif()

if(${OQS_USE_CUPQC})
# CMAKE's CUDA language requires CMAKE 3.18
cmake_minimum_required (VERSION 3.18)
enable_language(CUDA)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 80 90)
endif()
find_package(cuPQC 0.2.0 REQUIRED)
endif()

if (NOT ((CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") AND (ARCH_X86_64 STREQUAL "ON")) AND (OQS_LIBJADE_BUILD STREQUAL "ON"))
message(FATAL_ERROR "Building liboqs with libjade implementations from libjade is only supported on Linux and Darwin on x86_64.")
endif()
Expand Down
8 changes: 8 additions & 0 deletions CONFIGURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The following options can be passed to CMake before the build file generation pr
- [OQS_DIST_BUILD](#OQS_DIST_BUILD)
- [OQS_USE_CPUFEATURE_INSTRUCTIONS](#OQS_USE_CPUFEATURE_INSTRUCTIONS)
- [OQS_USE_OPENSSL](#OQS_USE_OPENSSL)
- [OQS_USE_CUPQC](#OQS_USE_CUPQC)
- [OQS_OPT_TARGET](#OQS_OPT_TARGET)
- [OQS_SPEED_USE_ARM_PMU](#OQS_SPEED_USE_ARM_PMU)
- [USE_SANITIZER](#USE_SANITIZER)
Expand Down Expand Up @@ -124,6 +125,13 @@ Dynamically load OpenSSL through `dlopen`. When using liboqs from other cryptogr

Only has an effect if the system supports `dlopen` and ELF binary format, such as Linux or BSD family.

### OQS_USE_CUPQC

Can be `ON` or `OFF`. When `ON`, use NVIDIA's cuPQC library where able (currently just ML-KEM). When this option is enabled, liboqs may not run correctly on machines that lack supported GPUs. To download cuPQC follow the instructions at (https://developer.nvidia.com/cupqc-download/). Detailed descriptions of the API, requirements, and installation guide are in the cuPQC documentation (https://docs.nvidia.com/cuda/cupqc/index.html). While the code shipped by liboqs required to use cuPQC is licensed under Apache 2.0 the cuPQC SDK comes with its own license agreement (https://docs.nvidia.com/cuda/cupqc/license.html).

**Default**: `OFF`


## Stateful Hash Based Signatures

XMSS and LMS are the two supported Hash-Based Signatures schemes.
Expand Down
1 change: 1 addition & 0 deletions PLATFORMS.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,4 @@ In this policy, the words "must" and "must not" specify absolute requirements th
- ppc641e for Ubuntu (Focal)
- s390x for Ubuntu (Focal)
- loongarch64 for Debian Linux (trixie)
- NVIDIA GPU architectures 70, 75, 80, 86, 89, and 90 with a x86_64 CPU for Linux
7 changes: 7 additions & 0 deletions docs/algorithms/kem/ml_kem.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
- **Primary Source**<a name="primary-source"></a>:
- **Source**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
- **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
- **cupqc-cuda**:<a name="cupqc-cuda"></a>
- **Source**: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
- **Implementation license (SPDX-Identifier)**: Apache-2.0


## Parameter set summary
Expand All @@ -25,6 +29,7 @@
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |

Are implementations chosen based on runtime CPU feature detection? **Yes**.

Expand All @@ -36,6 +41,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |

Are implementations chosen based on runtime CPU feature detection? **Yes**.

Expand All @@ -45,6 +51,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |

Are implementations chosen based on runtime CPU feature detection? **Yes**.

Expand Down
34 changes: 34 additions & 0 deletions docs/algorithms/kem/ml_kem.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ primary-upstream:
source: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd
with copy_from_upstream patches
spdx-license-identifier: CC0-1.0 or Apache-2.0
optimized-upstreams:
cupqc-cuda:
source: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
spdx-license-identifier: Apache-2.0
parameter-sets:
- name: ML-KEM-512
claimed-nist-level: 1
Expand Down Expand Up @@ -54,6 +58,16 @@ parameter-sets:
no-secret-dependent-branching-claimed: true
no-secret-dependent-branching-checked-by-valgrind: true
large-stack-usage: false
- upstream: cupqc-cuda
upstream-id: cuda
supported-platforms:
- architecture: CUDA
operating_systems:
- Linux
- Darwin
no-secret-dependent-branching-claimed: false
no-secret-dependent-branching-checked-by-valgrind: false
large-stack-usage: false
- name: ML-KEM-768
claimed-nist-level: 3
claimed-security: IND-CCA2
Expand Down Expand Up @@ -87,6 +101,16 @@ parameter-sets:
no-secret-dependent-branching-claimed: true
no-secret-dependent-branching-checked-by-valgrind: true
large-stack-usage: false
- upstream: cupqc-cuda
upstream-id: cuda
supported-platforms:
- architecture: CUDA
operating_systems:
- Linux
- Darwin
no-secret-dependent-branching-claimed: false
no-secret-dependent-branching-checked-by-valgrind: false
large-stack-usage: false
- name: ML-KEM-1024
claimed-nist-level: 5
claimed-security: IND-CCA2
Expand Down Expand Up @@ -120,3 +144,13 @@ parameter-sets:
no-secret-dependent-branching-claimed: true
no-secret-dependent-branching-checked-by-valgrind: true
large-stack-usage: false
- upstream: cupqc-cuda
upstream-id: cuda
supported-platforms:
- architecture: CUDA
operating_systems:
- Linux
- Darwin
no-secret-dependent-branching-claimed: false
no-secret-dependent-branching-checked-by-valgrind: false
large-stack-usage: false
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@ if(OQS_DIST_X86_64_BUILD OR ({% for flag in platform['required_flags'] -%} OQS_U
{%- endif %}
endif()
{% if platform['operating_systems'] %}endif()
{% endif -%}
{%- endfor -%}
{%- for platform in impl['supported_platforms'] if platform['architecture'] == 'CUDA' %}
{% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}")
{% endif -%}
if(OQS_USE_CUPQC)
cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name'] }} "" ON "OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}" OFF)
{%- if 'alias_scheme' in scheme %}
cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }} "" ON "OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}" OFF)
{%- endif %}
endif()
{% if platform['operating_systems'] %}endif()
{% endif -%}
{%- endfor -%}
{%- for platform in impl['supported_platforms'] if platform['architecture'] == 'ARM64_V8' %}
Expand Down
34 changes: 18 additions & 16 deletions scripts/copy_from_upstream/copy_from_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,14 +495,15 @@ def handle_implementation(impl, family, scheme, dst_basedir):
else:
# determine list of files to copy:
if 'sources' in i:
srcs = i['sources'].split(" ")
for s in srcs:
# Copy recursively only in case of directories not with plain files to avoid copying over symbolic links
if os.path.isfile(os.path.join(origfolder, s)):
subprocess.run(['cp', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
else:
subprocess.run(
['cp', '-r', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
if i['sources']:
srcs = i['sources'].split(" ")
for s in srcs:
# Copy recursively only in case of directories not with plain files to avoid copying over symbolic links
if os.path.isfile(os.path.join(origfolder, s)):
subprocess.run(['cp', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
else:
subprocess.run(
['cp', '-r', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
else:
subprocess.run(['cp', '-pr', os.path.join(origfolder, '.'), srcfolder])
# raise Exception("Malformed YML file: No sources listed to copy. Check upstream YML file." )
Expand Down Expand Up @@ -598,14 +599,15 @@ def process_families(instructions, basedir, with_kat, with_generator, with_libja
# when provided to the compiler; OQS uses the term ARM_NEON
if req['architecture'] == 'arm_8':
req['architecture'] = 'ARM64_V8'
if req['architecture'] == 'ARM64_V8' and 'asimd' in req['required_flags']:
req['required_flags'].remove('asimd')
req['required_flags'].append('arm_neon')
if req['architecture'] == 'ARM64_V8' and 'sha3' in req['required_flags']:
req['required_flags'].remove('sha3')
req['required_flags'].append('arm_sha3')
impl['required_flags'] = req['required_flags']
family['all_required_flags'].update(req['required_flags'])
if 'required_flags' in req:
if req['architecture'] == 'ARM64_V8' and 'asimd' in req['required_flags']:
req['required_flags'].remove('asimd')
req['required_flags'].append('arm_neon')
if req['architecture'] == 'ARM64_V8' and 'sha3' in req['required_flags']:
req['required_flags'].remove('sha3')
req['required_flags'].append('arm_sha3')
impl['required_flags'] = req['required_flags']
family['all_required_flags'].update(req['required_flags'])
except KeyError as ke:
if (impl['name'] != family['default_implementation']):
print("No required flags found for %s (KeyError %s on impl %s)" % (
Expand Down
12 changes: 12 additions & 0 deletions scripts/copy_from_upstream/copy_from_upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ upstreams:
kem_meta_path: '{pretty_name_full}_META.yml'
kem_scheme_path: '.'
patches: [pqcrystals-ml_kem.patch]
-
name: cupqc
git_url: https://github.com/praveksharma/cupqc-mlkem.git
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please eventually move to long-term maintained GH repo

git_branch: main
git_commit: b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
kem_meta_path: '{pretty_name_full}_META.yml'
kem_scheme_path: '.'
patches: []
-
name: pqcrystals-dilithium
git_url: https://github.com/pq-crystals/dilithium.git
Expand Down Expand Up @@ -166,6 +174,10 @@ kems:
-
name: ml_kem
default_implementation: ref
arch_specific_implementations:
cuda: cuda
arch_specific_upstream_locations:
cuda: cupqc
upstream_location: pqcrystals-kyber-standard
schemes:
-
Expand Down
9 changes: 9 additions & 0 deletions scripts/copy_from_upstream/src/kem/family/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,19 @@ if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}{%- if 'alias_scheme' in
target_compile_options({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PUBLIC {{ impl['compile_opts'] }})
{%- endif -%}

{%- elif impl['name'] == 'cuda' %}

if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if 'alias_scheme' in scheme %} OR OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}{%- endif %})
add_library({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} OBJECT {{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }}/cupqc_ml-kem.cu)
target_link_libraries({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} cupqc)
set_property(TARGET {{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PROPERTY CUDA_ARCHITECTURES OFF)
target_compile_options({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE {{ impl['compile_opts'] }})
{%- else %}

if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if 'alias_scheme' in scheme %} OR OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}{%- endif %})
add_library({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} OBJECT {% for source_file in impl['sources']|sort -%}{{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }}/{{ source_file }}{%- if not loop.last %} {% endif -%}{%- endfor -%})
{%- endif %}
{%- if impl['name'] != 'cuda' %}
target_include_directories({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/{{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }})
target_include_directories({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
{%- if impl['name'] != scheme['default_implementation'] and impl['required_flags'] -%}
Expand All @@ -60,6 +68,7 @@ if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if
target_compile_definitions({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE old_gas_syntax)
endif()
{%- endif %}
{%- endif %}{# cupqc #}
set(_{{ family|upper }}_OBJS ${_{{ family|upper }}_OBJS} $<TARGET_OBJECTS:{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}>)
endif()
{%- endfor -%}
Expand Down
Loading
Loading