diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..cd99ecd6 --- /dev/null +++ b/.clang-format @@ -0,0 +1,8 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +BasedOnStyle: Google +--- +Language: Cpp +DerivePointerAlignment: false +PointerAlignment: Left diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..67f3a93b --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +.vscode/ +.vs/ +build*/ + + +.DS_Store + +# Generated files +docs/conf.py +cmake/IntelHEXLConfig.cmake +*.log diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..08caca31 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,155 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: ubuntu:18.04 + +variables: + http_proxy: http://10.7.211.16:911 + https_proxy: http://10.7.211.16:912 + DEFAULT_COMPILER_FLAGS: "-DCMAKE_CXX_COMPILER=clang++-10 + -DCMAKE_C_COMPILER=clang-10 + -DHEXL_BENCHMARK=ON + -DHEXL_TESTING=ON + -DHEXL_COVERAGE=OFF + -DHEXL_EXPORT=ON + -DCMAKE_INSTALL_PREFIX=../" + DEBUG_COMPILER_FLAGS: "-DCMAKE_BUILD_TYPE=RelWithDebInfo + -DCMAKE_CXX_COMPILER=g++-9 + -DCMAKE_C_COMPILER=gcc-9 + -DHEXL_DEBUG=ON + -DHEXL_BENCHMARK=ON + -DHEXL_TESTING=ON + -DHEXL_COVERAGE=ON + -DHEXL_EXPORT=OFF + -DHEXL_DOCS=ON + -DHEXL_ENABLE_ADDRESS_SANITIZER=ON + -DCMAKE_INSTALL_PREFIX=../" + + GCOV_BIN: "/usr/bin/gcov-7" + DEFAULT_EXAMPLE_FLAGS: "-DINTEL_HEXL_HINT_DIR=${CI_PROJECT_DIR}/lib/cmake/" + +stages: + - format + - build + - test + - example + - benchmark + +.only-default: + only: + refs: + - merge_requests + - master + +.build: + extends: .only-default + before_script: + - whoami + - echo "Testing from branch:" + - echo $CI_COMMIT_REF_NAME + - cmake --version + - pwd + - ls + +format: + extends: .only-default + stage: format + script: + # Run formatting + - pre-commit run --all-files + +default_build: + stage: build + extends: .build + script: + # Build library + - mkdir -p build + - cd build + - cmake .. ${DEFAULT_COMPILER_FLAGS} + - make -j + - make -j install + # Build example + - cd ../example + - mkdir -p build + - cd build + - cmake .. ${DEFAULT_COMPILER_FLAGS} ${DEFAULT_EXAMPLE_FLAGS} + - make -j + artifacts: + paths: + - build/ + - lib/ + - include/ + - example/build + expire_in: 1 day + +debug_build: + stage: build + extends: .build + script: + # Build repo + - mkdir -p build + - cd build + - cmake .. ${DEBUG_COMPILER_FLAGS} + - make -j + - make -j docs + - make -j install + - cd ../ + + # Avoid putting debug_test in separate stage, since uploading/downloading artifacts takes a long time + - pwd + - ls + - echo ${CI_PROJECT_DIR} + # Generate initial baseline + - /usr/local/bin/lcov --capture --initial --directory build/ --output-file cov_base.info + # - /usr/local/bin/lcov --list cov_base.info + - build/test/unit-test --v=3 + # Run lcov again + - /usr/local/bin/lcov --capture --directory build/ --output-file cov_test.info + # - /usr/local/bin/lcov --list cov_test.info + # Combine two trace files + - /usr/local/bin/lcov --add-tracefile cov_base.info --add-tracefile cov_test.info --output-file cov_total.info + # - /usr/local/bin/lcov --list cov_total.info + # Remove unwanted directories + - /usr/local/bin/lcov --remove cov_total.info '/usr/include/*' '/usr/lib/*' '*/test/*' '*/build/*' '*/benchmark/*' -o cov_filter.info + # report overall summary to be parsed by gitlab CI + - /usr/local/bin/lcov --list cov_filter.info + # Generate coverage html + - genhtml --branch-coverage cov_filter.info --output-directory coverage + + artifacts: + paths: + - coverage/ + expire_in: 1 day + +default_example: + stage: example + extends: .only-default + script: + - example/build/example + artifacts: + paths: + - example/build + expire_in: 1 day + dependencies: + - default_build + +test: + stage: test + extends: .only-default + script: + - build/test/unit-test + - example/build/example + dependencies: + - default_build + +benchmark: + stage: benchmark + extends: .only-default + script: + - build/benchmark/bench_hexl --benchmark_out="${CI_JOB_NAME}_${CI_COMMIT_SHA}" --benchmark_out_format=csv + dependencies: + - default_build + artifacts: + paths: + - "${CI_JOB_NAME}_${CI_COMMIT_SHA}" + expire_in: 5 yr diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..accab005 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-merge-conflict + - id: mixed-line-ending + - id: check-byte-order-marker + - id: check-yaml + - repo: local + hooks: + - id: clang-format-9 + name: clang-format-9 + entry: clang-format-9 + language: system + files: \.(c|cc|cxx|cpp|h|hpp|hxx|js|proto)$ + args: ["-i"] + - id: cpplint + name: cpplint + entry: cpplint + language: system + files: \.(c|cc|cxx|cpp|h|hpp|hxx)$ + args: + - --recursive diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..755474c3 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,274 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.5.1) +project(INTEL_HEXL VERSION 1.0.0 LANGUAGES C CXX) + +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) +include(CMakePackageConfigHelpers) + +# set directory where the custom finders live +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") +include(HEXLHelpers) + +if(CMAKE_BUILD_TYPE) + set(RELEASE_TYPES + Debug + Release + RelWithDebInfo + MinSizeRel) + list(FIND RELEASE_TYPES ${CMAKE_BUILD_TYPE} INDEX_FOUND) + if(${INDEX_FOUND} EQUAL -1) + message( + FATAL_ERROR + "CMAKE_BUILD_TYPE must be one of Debug, Release, RelWithDebInfo, or MinSizeRel" + ) + endif() +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type" FORCE) +endif() + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_INSTALL_MESSAGE LAZY) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +# Create compilation database compile_commands.json +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +set(CMAKE_INSTALL_RPATH "\$ORIGIN") + +#------------------------------------------------------------------------------ +# Compiler options... +#------------------------------------------------------------------------------ + +# Sanitizers +option(HEXL_ENABLE_ADDRESS_SANITIZER "Compiles and links with Address Sanitizer" OFF) +option(HEXL_ENABLE_THREAD_SANITIZER "Compiles and links with Thread Sanitizer" OFF) +option(HEXL_ENABLE_UB_SANITIZER "Compiles and links with Undefined Behavior Sanitizer" OFF) + +option(HEXL_BENCHMARK "Enable benchmarking" ON) +option(HEXL_COVERAGE "Enables coverage for unit tests" OFF) +option(HEXL_DEBUG "Enable runtime debug checking; will yield much slower code " OFF) +option(HEXL_DOCS "Enable documentation building" OFF) +option(HEXL_EXPORT "Enables export of intel_hexl target" OFF) +option(HEXL_SHARED_LIB "Generate a shared library" OFF) +option(HEXL_TESTING "Enables unit-tests" ON) + +message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") +message(STATUS "CMAKE_C_COMPILER: ${CMAKE_C_COMPILER}") +message(STATUS "CMAKE_CXX_COMPILER: ${CMAKE_CXX_COMPILER}") +message(STATUS "HEXL_ENABLE_ADDRESS_SANITIZER: ${HEXL_ENABLE_ADDRESS_SANITIZER}") +message(STATUS "HEXL_ENABLE_THREAD_SANITIZER: ${HEXL_ENABLE_THREAD_SANITIZER}") +message(STATUS "HEXL_ENABLE_UB_SANITIZER: ${HEXL_ENABLE_UB_SANITIZER}") +message(STATUS "HEXL_BENCHMARK: ${HEXL_BENCHMARK}") +message(STATUS "HEXL_COVERAGE: ${HEXL_COVERAGE}") +message(STATUS "HEXL_DEBUG: ${HEXL_DEBUG}") +message(STATUS "HEXL_DOCS: ${HEXL_DOCS}") +message(STATUS "HEXL_EXPORT: ${HEXL_EXPORT}") +message(STATUS "HEXL_SHARED_LIB: ${HEXL_SHARED_LIB}") +message(STATUS "HEXL_TESTING: ${HEXL_TESTING}") + + +# Compiler flags +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # using Visual Studio C++ + set(HEXL_USE_MSVC ON) + add_definitions(-DHEXL_USE_MSVC) + message(STATUS "HEXL_USE_MSVC: ${HEXL_USE_MSVC}") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # using GCC + set(HEXL_USE_GNU ON) + add_definitions(-DHEXL_USE_GNU) + message(STATUS "HEXL_USE_GNU: ${HEXL_USE_GNU}") +elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # using Clang + set(HEXL_USE_CLANG ON) + add_definitions(-DHEXL_USE_CLANG) + message(STATUS "HEXL_USE_CLANG: ${HEXL_USE_CLANG}") +else() + message(WARNING "Unsupported compiler ${CMAKE_CXX_COMPILER_ID}") +endif() + +if (HEXL_COVERAGE) + if (NOT HEXL_TESTING) + message(FATAL_ERROR "HEXL_COVERAGE enabled, but HEXL_TESTING not enabled. Enable HEXL_TESTING for best coverage.") + endif() + add_compile_options(--coverage -O0 -g) + add_compile_options(-fprofile-arcs -ftest-coverage) + add_compile_options(-fkeep-inline-functions) + add_link_options(--coverage) + # link_libraries(gcov) +endif() + +if (HEXL_DEBUG) + add_definitions(-DHEXL_DEBUG) +endif() + +set(HEXL_SRC_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/intel-hexl) +set(HEXL_INC_ROOT_DIR ${HEXL_SRC_ROOT_DIR}/include) # Public headers + +if(HEXL_ENABLE_ADDRESS_SANITIZER) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize=leak") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -fsanitize=leak") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fsanitize=address -fsanitize=leak") +endif() + +if(HEXL_ENABLE_THREAD_SANITIZER) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fsanitize=thread") +endif() + +if(HEXL_ENABLE_UB_SANITIZER) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=float-divide-by-zero -fsanitize=undefined -fsanitize=float-cast-overflow -fsanitize=implicit-conversion -fPIE") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=float-divide-by-zero -fsanitize=undefined -fsanitize=float-cast-overflow -fsanitize=implicit-conversion -fPIE -pie") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fsanitize=float-divide-by-zero -fsanitize=undefined -fsanitize=float-cast-overflow -fsanitize=implicit-conversion -fPIE -pie") +endif() + +if(HEXL_ENABLE_ADDRESS_SANITIZER OR HEXL_ENABLE_THREAD_SANITIZER OR HEXL_ENABLE_UB_SANITIZER) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g -fno-omit-frame-pointer") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -g -fno-omit-frame-pointer") +endif() + +message(STATUS "CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}") +message(STATUS "CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}") +message(STATUS "CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}") +message(STATUS "CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") +message(STATUS "CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}") + +#------------------------------------------------------------------------------ +# Set AVX flags +#------------------------------------------------------------------------------ +check_compile_flag("${CMAKE_MODULE_PATH}/test-avx512dq.cpp" HEXL_HAS_AVX512DQ) +check_compile_flag("${CMAKE_MODULE_PATH}/test-avx512ifma.cpp" HEXL_HAS_AVX512IFMA) +# TODO:just for testing; remove +check_compile_flag("${CMAKE_MODULE_PATH}/test-avx256.cpp" HEXL_HAS_AVX256) + +# ------------------------------------------------------------------------------ +# Installation logic... +# ------------------------------------------------------------------------------ + +if(NOT CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}) +endif() + +if(LINUX) + include(GNUInstallDirs) +else() + set(CMAKE_INSTALL_BINDIR "bin") + set(CMAKE_INSTALL_INCLUDEDIR "include") + set(CMAKE_INSTALL_DOCDIR "doc") + set(CMAKE_INSTALL_LIBDIR "lib") +endif() +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_INSTALL_LIBDIR}) + +message(STATUS "CMAKE_INSTALL_LIBDIR: ${CMAKE_INSTALL_LIBDIR}") +message(STATUS "CMAKE_INSTALL_INCLUDEDIR: ${CMAKE_INSTALL_INCLUDEDIR}") +message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") + +#------------------------------------------------------------------------------ +# Third-party code... +#------------------------------------------------------------------------------ +add_subdirectory(cmake/cpu-features) + +if (HEXL_TESTING OR HEXL_BENCHMARK OR HEXL_DEBUG) + if(NOT TARGET Threads::Threads) + set(THREADS_PREFER_PTHREAD_FLAG ON) + endif() + find_package(Threads REQUIRED) +endif() + +if (HEXL_TESTING) + add_subdirectory(cmake/gtest) +endif() + +if (HEXL_BENCHMARK) + add_subdirectory(cmake/gbenchmark) +endif() + +if (HEXL_DEBUG) + add_subdirectory(cmake/easylogging) + add_subdirectory(cmake/gflags) +endif() + +#------------------------------------------------------------------------------ +# Subfolders... +#------------------------------------------------------------------------------ +add_subdirectory(intel-hexl) + +if (HEXL_BENCHMARK) + add_subdirectory(benchmark) + add_custom_target(bench COMMAND $ DEPENDS bench_hexl) +endif() + +if (HEXL_TESTING) + add_subdirectory(test) + add_custom_target(unittest COMMAND $ DEPENDS unit-test) +endif() + +if (HEXL_DOCS) + add_subdirectory(docs) +endif() + + +#------------------------------------------------------------------------------ +# Config export... +#------------------------------------------------------------------------------ + +if (HEXL_EXPORT) + if (HEXL_SHARED_LIB OR HEXL_DEBUG) + message(FATAL_ERROR "HEXL_EXPORT=ON only valid when + HEXL_SHARED_LIB=OFF and HEXL_DEBUG=OFF ") + endif() + + # Config + set(INTEL_HEXL_TARGET_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/cmake/IntelHEXLTargets.cmake) + set(INTEL_HEXL_CONFIG_IN_FILENAME ${CMAKE_CURRENT_SOURCE_DIR}/cmake/IntelHEXLConfig.cmake.in) + set(INTEL_HEXL_CONFIG_FILENAME ${CMAKE_CURRENT_SOURCE_DIR}/cmake/IntelHEXLConfig.cmake) + set(INETL_HEXL_CONFIG_VERSION_FILENAME ${CMAKE_CURRENT_BINARY_DIR}/cmake/IntelHEXLConfigVersion.cmake) + + set(INTEL_HEXL_CONFIG_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}/cmake/) + + # Create and install the CMake config and target file + install( + EXPORT IntelHEXLTargets + DESTINATION ${INTEL_HEXL_CONFIG_INSTALL_DIR} + ) + + # Export version + write_basic_package_version_file( + ${INETL_HEXL_CONFIG_VERSION_FILENAME} + VERSION ${INTEL_HEXL_VERSION} + COMPATIBILITY ExactVersion) + + include(CMakePackageConfigHelpers) + configure_package_config_file( + ${INTEL_HEXL_CONFIG_IN_FILENAME} ${INTEL_HEXL_CONFIG_FILENAME} + INSTALL_DESTINATION ${INTEL_HEXL_CONFIG_INSTALL_DIR} + ) + + install( + TARGETS intel_hexl + EXPORT IntelHEXLTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ) + + install(FILES ${INTEL_HEXL_CONFIG_FILENAME} + ${INETL_HEXL_CONFIG_VERSION_FILENAME} + DESTINATION ${INTEL_HEXL_CONFIG_INSTALL_DIR}) + + export(EXPORT IntelHEXLTargets + FILE ${INTEL_HEXL_TARGET_FILENAME}) + +endif() + +add_custom_target(check COMMAND pre-commit install && pre-commit run --all-files) diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000..48962757 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default codeowner for all files +* @fboemer @sejunkim @gseifu diff --git a/CPPLINT.cfg b/CPPLINT.cfg new file mode 100644 index 00000000..9256a26b --- /dev/null +++ b/CPPLINT.cfg @@ -0,0 +1,4 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +filter=-build/c++11,-build/include_what_you_use diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..505bd8b9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..92d9da0d --- /dev/null +++ b/README.md @@ -0,0 +1,167 @@ +# Intel Homomorphic Encryption Acceleration Library (HEXL) +Intel:registered: HEXL is an open-source library which provides efficient implementations of integer arithmetic on Galois fields. Such arithmetic is prevalent in cryptography, particularly in homomorphic encryption (HE) schemes. Intel HEXL targets integer arithmetic with word-sized primes, typically 40-60 bits. Intel HEXL provides an API for 64-bit unsigned integers and targets Intel CPUs. + +## Contents +- [Intel Homomorphic Encryption Acceleration Library (HEXL)](#intel-homomorphic-encryption-acceleration-library-hexl) + - [Contents](#contents) + - [Introduction](#introduction) + - [Building Intel HEXL](#building-intel-hexl) + - [Dependencies](#dependencies) + - [Compile-time options](#compile-time-options) + - [Compiling Intel HEXL](#compiling-intel-hexl) + - [Testing Intel HEXL](#testing-intel-hexl) + - [Benchmarking Intel HEXL](#benchmarking-intel-hexl) + - [Using Intel HEXL](#using-intel-hexl) + - [Debugging](#debugging) + - [Thread-safety](#thread-safety) +- [Documentation](#documentation) + - [Doxygen](#doxygen) + - [Sphinx](#sphinx) +- [Contributing](#contributing) + - [Repository layout](#repository-layout) + +## Introduction +Many cryptographic applications, particularly homomorphic encryption (HE), rely on integer polynomial arithmetic in a finite field. HE, which enables computation on encrypted data, typically uses polynomials with degree `N` a power of two roughly in the range `N=[2^{10}, 2^{17}]`. The coefficients of these polynomials are in a finite field with a word-sized primes, `p`, up to `p`~62 bits. More precisely, the polynomials live in the ring `Z_p[X]/(X^N + 1)`. That is, when adding or multiplying two polynomials, each coefficient of the result is reduced by the prime modulus `p`. When multiplying two polynomials, the resulting polynomials of degree `2N` is additionally reduced by taking the remainder when dividing by `X^N+1`. + + The primary bottleneck in many HE applications is polynomial-polynomial multiplication in `Z_p[X]/(X^N + 1)`. For efficient implementation, Intel HEXL implements the negacyclic number-theoretic transform (NTT). To multiply two polynomials, `p_1(x), p_2(x)` using the NTT, we perform the FwdNTT on the two input polynomials, then perform an element-wise modular multiplication, and perform the InvNTT on the result. + +Intel HEXL implements the following functions: +- The forward and inverse negacyclic number-theoretic transform (NTT) +- Element-wise vector-vector modular multiplication +- Element-wise vector-scalar modular multiplication with optional addition +- Element-wise modular multiplication + +For each function, the library implements one or several Intel(R) AVX-512 implementations, as well as a less performant, more readable native C++ implementation. Intel HEXL will automatically choose the best implementation for the given CPU Intel(R) AVX-512 feature set. In particular, when the modulus `p` is less than `2^{50}`, the AVX512IFMA instruction set available on Intel IceLake server and IceLake client will provide a more efficient implementation. + +For additional functionality, see the public headers, located in `include/intel-hexl` +## Building Intel HEXL + +### Dependencies +We have tested Intel HEXL on the following operating systems: +- Ubuntu 18.04 +- macOS 10.15 +- Microsoft Windows 10 + +Intel HEXL requires the following dependencies: + +| Dependency | Version | +|-------------|----------------------------------------------| +| CMake | >= 3.5.1 | +| Compiler | gcc >= 7.0, clang++ >= 5.0, MSVC >= 2019 | + +For best performance, we recommend using a processor with AVX512-IFMA52 support, and a recent compiler (gcc >= 8.0, clang++ >= 6.0). To determine if your process supports AVX512-IFMA52, simply look for `HEXL_HAS_AVX512IFMA` during the configure step (see [Compiling Intel HEXL](#compiling-intel-hexl)). + + +### Compile-time options +In addition to the standard CMake build options, Intel HEXL supports several compile-time flags to configure the build. +For convenience, they are listed below: + +| CMake option | Values | | +| ---------------------------------| ---------------------- | ------------------------------------------------------------------------ | +| HEXL_BENCHMARK | ON / OFF (default ON) | Set to ON to enable benchmark suite via Google benchmark | +| HEXL_COVERAGE | ON / OFF (default OFF) | Set to ON to enable coverage report of unit-tests | +| HEXL_DEBUG | ON / OFF (default OFF) | Set to ON to enable debugging at large runtime penalty | +| HEXL_DOCS | ON / OFF (default OFF) | Set to ON to enable building of documentation | +| HEXL_ENABLE_ADDRESS_SANITIZER | ON / OFF (default OFF) | Set to ON to enable building with address sanitizer (ASan) | +| HEXL_ENABLE_THREAD_SANITIZER | ON / OFF (default OFF) | Set to ON to enable building with thread sanitizer (TSan) | +| HEXL_ENABLE_UB_SANITIZER | ON / OFF (default OFF) | Set to ON to enable building with undefined behavior sanitizer (UBSan) | +| HEXL_EXPORT | ON / OFF (default OFF) | Set to ON to enable export of Intel HEXL for use in 3rd-party project | +| HEXL_SHARED_LIB | ON / OFF (default OFF) | Set to ON to enable building shared library | +| HEXL_TESTING | ON / OFF (default ON) | Set to ON to enable building of unit-tests | + +### Compiling Intel HEXL +The instructions to build Intel HEXL are common between Linux, MacOS, and Windows. + +To compile Intel HEXL from source code, first clone the repository into your current directory. Then, to configure the build, call +```bash +cmake -S . -B build +``` +adding the desired compile-time options with a `-D` flag. For instance, to build Intel HEXL with debugging capabilities, call +```bash +cmake -S . -B build -DHEXL_DEBUG=ON +``` + +Then, to build Intel HEXL, call +```bash +cmake --build build +``` +This will build the Intel HEXL library in the `build/intel-hexl/lib/` directory. + +To install Intel HEXL to the installation directory, run +```bash +cmake --install build +``` +To use a non-standard installation directory, configure the build with +```bash +cmake -S . -B build -DCMAKE_INSTALL_PREFIX=/path/to/install +``` + +## Testing Intel HEXL +To run a set of unit tests via Googletest, configure and build Intel HEXL with `-DHEXL_TESTING=ON` (see [Compile-time options](#compile-time-options)). +Then, run +```bash +cmake --build build --target unittest +``` +The unit-test executable itself is located at `build/test/unit-test` +## Benchmarking Intel HEXL +To run a set of benchmarks via Google benchmark, configure and build Intel HEXL with `-DHEXL_BENCHMARK=ON` (see [Compile-time options](#compile-time-options)). +Then, run +```bash +cmake --build build --target bench +``` +The benchmark executable itself is located at `build/benchmark/bench_hexl` + +## Using Intel HEXL +The `example` folder has an example of using Intel HEXL in a third-party project. + +## Debugging +For optimal performance, Intel HEXL does not perform input validation. In many cases the time required for the validation would be longer than the execution of the function itself. To debug Intel HEXL, configure and build Intel HEXL with `-DHEXL_DEBUG=ON` (see [Compile-time options](#compile-time-options)). This will generate a debug version of the library, e.g. `libintel_hexl_debug.a`, that can be used to debug the execution. + +**Note**, enabling `HEXL_DEBUG=ON` will result in a significant runtime overhead. +## Thread-safety +Intel HEXL is single-threaded and thread-safe. + +# Documentation +See [https://intel.github.io/hexl](https://intel.github.io/hexl) for Doxygen documentation. + +Intel HEXL supports documentation via Doxygen and sphinx. +To build documentation, first install `doxygen` and `graphviz`, e.g. +```bash +sudo apt-get install doxygen graphviz +``` +Then, configure Intel HEXL with `-DHEXL_DOCS=ON` (see [Compile-time options](#compile-time-options)). +### Doxygen + To build Doxygen documentation, after configuring Intel HEXL with `-DHEXL_DOCS=ON`, run +``` +cmake --build build --target doxygen +``` +To view the generated Doxygen documentation, open the generated `build/docs/doxygen/html/index.html` file in a web browser. + +### Sphinx +To build the sphinx documentation, install `sphinx` and required dependencies `breathe, m2r2`, e.g. +```bash +sudo apt-get install python3-sphinx +pip3 install breathe m2r2 +``` + +Then, after configuring Intel HEXL with `-DHEXL_DOCS=ON`, run +```bash +cmake --build build --target docs +``` +To view the generated Sphinx documentation, open the generated `build/docs/sphinx/html/index.html` file in a web browser. + +# Contributing + +At this time, Intel HEXL does not accept external contributions. We encourage feedback and suggestions via issues. + +For Intel developers, use [pre-commit](https://pre-commit.com/) to validate the formatting of the code. + +Before contributing, please run +```bash +cmake --build build --target check unittest +``` +and make sure pre-commit checks and all unit tests pass. + +## Repository layout +Public headers reside in the `intel-hexl/include` folder. +Private headers, e.g. those containing Intel(R) AVX-512 code should not be put in this folder. diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 00000000..70ccb709 --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set(SRC main.cpp + bench-ntt.cpp + bench-eltwise-add-mod.cpp + bench-eltwise-cmp-add.cpp + bench-eltwise-cmp-sub-mod.cpp + bench-eltwise-fma-mod.cpp + bench-eltwise-mult-mod.cpp + ) + +add_executable(bench_hexl ${SRC}) + +target_include_directories(bench_hexl PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${HEXL_SRC_ROOT_DIR} # Private headers + ) + +target_link_libraries(bench_hexl PRIVATE intel_hexl benchmark::benchmark Threads::Threads) +if (HEXL_DEBUG) + target_link_libraries(bench_hexl PRIVATE easyloggingpp) +endif() + +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + target_compile_options(bench_hexl PRIVATE -Wall -Wextra -march=native -O3) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(bench_hexl PRIVATE /Wall /W4 /Zc:preprocessor) +endif() diff --git a/benchmark/bench-eltwise-add-mod.cpp b/benchmark/bench-eltwise-add-mod.cpp new file mode 100644 index 00000000..7d691de8 --- /dev/null +++ b/benchmark/bench-eltwise-add-mod.cpp @@ -0,0 +1,117 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "eltwise/eltwise-add-mod-avx512.hpp" +#include "eltwise/eltwise-add-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-add-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" + +namespace intel { +namespace hexl { + +// state[0] is the degree +static void BM_EltwiseAddModInPlace(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + uint64_t modulus = 0xffffffffffc0001ULL; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 0); + + for (auto _ : state) { + EltwiseAddMod(input1.data(), input1.data(), input2.data(), input_size, + modulus); + } +} + +BENCHMARK(BM_EltwiseAddModInPlace) + ->Unit(benchmark::kMicrosecond) + ->MinTime(2.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +// state[0] is the degree +static void BM_EltwiseAddModCopy(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + uint64_t modulus = 0xffffffffffc0001ULL; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 0); + + for (auto _ : state) { + EltwiseAddMod(output.data(), input1.data(), input2.data(), input_size, + modulus); + } +} + +BENCHMARK(BM_EltwiseAddModCopy) + ->Unit(benchmark::kMicrosecond) + ->MinTime(2.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +// state[0] is the degree +static void BM_EltwiseAddModNative(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + uint64_t modulus = 0xffffffffffc0001ULL; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 0); + + for (auto _ : state) { + EltwiseAddModNative(output.data(), input1.data(), input2.data(), input_size, + modulus); + } +} + +BENCHMARK(BM_EltwiseAddModNative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +static void BM_EltwiseAddModAVX512(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t modulus = 1152921504606877697; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 0); + + for (auto _ : state) { + EltwiseAddModAVX512(output.data(), input1.data(), input2.data(), input_size, + modulus); + } +} + +BENCHMARK(BM_EltwiseAddModAVX512) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); +#endif + +//================================================================= + +} // namespace hexl +} // namespace intel diff --git a/benchmark/bench-eltwise-cmp-add.cpp b/benchmark/bench-eltwise-cmp-add.cpp new file mode 100644 index 00000000..3ac5b30e --- /dev/null +++ b/benchmark/bench-eltwise-cmp-add.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include + +#include "eltwise/eltwise-cmp-add-avx512.hpp" +#include "eltwise/eltwise-cmp-add-internal.hpp" +#include "intel-hexl/eltwise/eltwise-cmp-add.hpp" +#include "logging/logging.hpp" +#include "util/aligned-allocator.hpp" + +namespace intel { +namespace hexl { + +//================================================================= + +// state[0] is the degree +static void BM_EltwiseCmpAddNative(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_int_distribution distrib(1, 100); + + uint64_t bound = distrib(gen); + uint64_t diff = distrib(gen); + AlignedVector64 input1(input_size); + for (size_t i = 0; i < input_size; ++i) { + input1[i] = distrib(gen); + } + + for (auto _ : state) { + EltwiseCmpAddNative(input1.data(), input1.data(), CMPINT::NLT, bound, diff, + input_size); + } +} + +BENCHMARK(BM_EltwiseCmpAddNative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +static void BM_EltwiseCmpAddAVX512(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_int_distribution distrib(1, 100); + + uint64_t bound = 50; + uint64_t diff = distrib(gen); + AlignedVector64 input1(input_size); + for (size_t i = 0; i < input_size; ++i) { + input1[i] = distrib(gen); + } + + for (auto _ : state) { + EltwiseCmpAddAVX512(input1.data(), input1.data(), CMPINT::NLT, bound, diff, + input_size); + } +} + +BENCHMARK(BM_EltwiseCmpAddAVX512) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); +#endif + +} // namespace hexl +} // namespace intel diff --git a/benchmark/bench-eltwise-cmp-sub-mod.cpp b/benchmark/bench-eltwise-cmp-sub-mod.cpp new file mode 100644 index 00000000..aea1a9e5 --- /dev/null +++ b/benchmark/bench-eltwise-cmp-sub-mod.cpp @@ -0,0 +1,87 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include + +#include "eltwise/eltwise-cmp-sub-mod-avx512.hpp" +#include "eltwise/eltwise-cmp-sub-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp" +#include "logging/logging.hpp" +#include "util/aligned-allocator.hpp" + +namespace intel { +namespace hexl { + +//================================================================= + +// state[0] is the degree +static void BM_EltwiseCmpSubModNative(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + + std::random_device rd; + std::mt19937 gen(rd()); + + uint64_t modulus = 100; + std::uniform_int_distribution distrib(1, modulus - 1); + + uint64_t bound = distrib(gen); + uint64_t diff = distrib(gen); + AlignedVector64 input1(input_size); + for (size_t i = 0; i < input_size; ++i) { + input1[i] = distrib(gen); + } + + for (auto _ : state) { + EltwiseCmpSubModNative(input1.data(), input1.data(), CMPINT::NLT, bound, + diff, modulus, input_size); + } +} + +BENCHMARK(BM_EltwiseCmpSubModNative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +static void BM_EltwiseCmpSubModAVX512(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t modulus = 100; + + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_int_distribution distrib(1, modulus - 1); + + uint64_t bound = distrib(gen); + uint64_t diff = distrib(gen); + AlignedVector64 input1(input_size); + for (size_t i = 0; i < input_size; ++i) { + input1[i] = distrib(gen); + } + + for (auto _ : state) { + EltwiseCmpSubModAVX512(input1.data(), input1.data(), CMPINT::NLT, bound, + diff, modulus, input_size); + } +} + +BENCHMARK(BM_EltwiseCmpSubModAVX512) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); +#endif + +//================================================================= + +} // namespace hexl +} // namespace intel diff --git a/benchmark/bench-eltwise-fma-mod.cpp b/benchmark/bench-eltwise-fma-mod.cpp new file mode 100644 index 00000000..6d3cbbc7 --- /dev/null +++ b/benchmark/bench-eltwise-fma-mod.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "eltwise/eltwise-fma-mod-avx512.hpp" +#include "eltwise/eltwise-fma-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-fma-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" + +namespace intel { +namespace hexl { + +//================================================================= + +// state[0] is the degree +static void BM_EltwiseFMANative(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + uint64_t modulus = 0xffffffffffc0001ULL; + + AlignedVector64 op1(input_size, 1); + uint64_t op2 = 1; + AlignedVector64 op3(input_size, 2); + + for (auto _ : state) { + EltwiseFMAMod(op1.data(), op1.data(), op2, op3.data(), op1.size(), modulus, + 1); + } +} + +BENCHMARK(BM_EltwiseFMANative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +static void BM_EltwiseFMAAVX512DQ(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t modulus = 100; + + AlignedVector64 input1(input_size, 1); + uint64_t input2 = 3; + AlignedVector64 input3(input_size, 2); + + for (auto _ : state) { + EltwiseFMAModAVX512<64, 1>(input1.data(), input1.data(), input2, + input3.data(), input_size, modulus); + } +} + +BENCHMARK(BM_EltwiseFMAAVX512DQ) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); +#endif + +//================================================================= + +#ifdef HEXL_HAS_AVX512IFMA +// state[0] is the degree +static void BM_EltwiseFMAAVX512IFMA(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t modulus = 100; + + AlignedVector64 input1(input_size, 1); + uint64_t input2 = 3; + AlignedVector64 input3(input_size, 2); + + for (auto _ : state) { + EltwiseFMAModAVX512<52, 1>(input1.data(), input1.data(), input2, + input3.data(), input_size, modulus); + } +} + +BENCHMARK(BM_EltwiseFMAAVX512IFMA) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); +#endif + +} // namespace hexl +} // namespace intel diff --git a/benchmark/bench-eltwise-mult-mod.cpp b/benchmark/bench-eltwise-mult-mod.cpp new file mode 100644 index 00000000..b73d5d6f --- /dev/null +++ b/benchmark/bench-eltwise-mult-mod.cpp @@ -0,0 +1,163 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "eltwise/eltwise-mult-mod-avx512.hpp" +#include "eltwise/eltwise-mult-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-mult-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" + +namespace intel { +namespace hexl { + +// state[0] is the degree +// state[1] is the bit-width of the modulus +// state[2] is the input_mod_factor +static void BM_EltwiseMultMod(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t bit_width = state.range(1); + size_t input_mod_factor = state.range(2); + uint64_t modulus = (1ULL << bit_width) + 7; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 2); + + for (auto _ : state) { + EltwiseMultMod(output.data(), input1.data(), input2.data(), input_size, + modulus, input_mod_factor); + } +} + +BENCHMARK(BM_EltwiseMultMod) + ->Unit(benchmark::kMicrosecond) + ->MinTime(1.0) + ->ArgsProduct({{1024, 8192, 16384}, + {48, 49, 50, 51, 52, 58, 59, 60}, + {1, 2, 4}}); + +//================================================================= + +// state[0] is the degree +static void BM_EltwiseMultModNative(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + uint64_t modulus = 0xffffffffffc0001ULL; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 2); + + for (auto _ : state) { + EltwiseMultModNative<1>(output.data(), input1.data(), input2.data(), + input_size, modulus); + } +} + +BENCHMARK(BM_EltwiseMultModNative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({16384}); + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +// state[1] is the input_mod_factor +static void BM_EltwiseMultModAVX512Float(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t input_mod_factor = state.range(1); + size_t modulus = 100; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 2); + + for (auto _ : state) { + switch (input_mod_factor) { + case 1: + EltwiseMultModAVX512Float<1>(output.data(), input1.data(), + input2.data(), input_size, modulus); + break; + case 2: + EltwiseMultModAVX512Float<2>(output.data(), input1.data(), + input2.data(), input_size, modulus); + break; + case 4: + EltwiseMultModAVX512Float<4>(output.data(), input1.data(), + input2.data(), input_size, modulus); + break; + } + } +} + +BENCHMARK(BM_EltwiseMultModAVX512Float) + ->Unit(benchmark::kMicrosecond) + ->MinTime(1.0) + ->Args({4096, 1}) + ->Args({4096, 2}) + ->Args({4096, 4}) + ->Args({8192, 1}) + ->Args({8192, 2}) + ->Args({8192, 4}) + ->Args({16384, 1}) + ->Args({16384, 2}) + ->Args({16384, 4}); +#endif + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +// state[1] is the input_mod_factor +static void BM_EltwiseMultModAVX512Int(benchmark::State& state) { // NOLINT + size_t input_size = state.range(0); + size_t input_mod_factor = state.range(1); + size_t modulus = 1152921504606877697; + + AlignedVector64 input1(input_size, 1); + AlignedVector64 input2(input_size, 2); + AlignedVector64 output(input_size, 3); + + for (auto _ : state) { + switch (input_mod_factor) { + case 1: + EltwiseMultModAVX512Int<1>(output.data(), input1.data(), input2.data(), + input_size, modulus); + break; + case 2: + EltwiseMultModAVX512Int<2>(output.data(), input1.data(), input2.data(), + input_size, modulus); + break; + case 4: + EltwiseMultModAVX512Int<4>(output.data(), input1.data(), input2.data(), + input_size, modulus); + break; + } + } +} + +BENCHMARK(BM_EltwiseMultModAVX512Int) + ->Unit(benchmark::kMicrosecond) + ->MinTime(1.0) + ->Args({4096, 1}) + ->Args({4096, 2}) + ->Args({4096, 4}) + ->Args({8192, 1}) + ->Args({8192, 2}) + ->Args({8192, 4}) + ->Args({16384, 1}) + ->Args({16384, 2}) + ->Args({16384, 4}); +#endif + +//================================================================= + +} // namespace hexl +} // namespace intel diff --git a/benchmark/bench-ntt.cpp b/benchmark/bench-ntt.cpp new file mode 100644 index 00000000..8a9ba4dc --- /dev/null +++ b/benchmark/bench-ntt.cpp @@ -0,0 +1,391 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "intel-hexl/ntt/ntt.hpp" +#include "logging/logging.hpp" +#include "ntt/fwd-ntt-avx512.hpp" +#include "ntt/inv-ntt-avx512.hpp" +#include "ntt/ntt-internal.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" + +namespace intel { +namespace hexl { + +// Forward transforms + +//================================================================= + +// state[0] is the degree +static void BM_FwdNTTNative(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime = GeneratePrimes(1, 45, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + for (auto _ : state) { + ForwardTransformToBitReverse64( + input.data(), ntt_size, prime, ntt_impl.GetRootOfUnityPowers().data(), + ntt_impl.GetPrecon64RootOfUnityPowers().data(), 2, 1); + } +} + +BENCHMARK(BM_FwdNTTNative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); +//================================================================= + +#ifdef HEXL_HAS_AVX512IFMA +// state[0] is the degree +static void BM_FwdNTT_AVX512IFMA(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime_bits = 49; + size_t prime = GeneratePrimes(1, prime_bits, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon52RootOfUnityPowers(); + + for (auto _ : state) { + ForwardTransformToBitReverseAVX512( + input.data(), ntt_size, prime, root_of_unity.data(), + precon_root_of_unity.data(), 2, 1); + } +} + +BENCHMARK(BM_FwdNTT_AVX512IFMA) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +// state[0] is the degree +static void BM_FwdNTT_AVX512IFMALazy(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime_bits = 49; + size_t prime = GeneratePrimes(1, prime_bits, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon52RootOfUnityPowers(); + + for (auto _ : state) { + ForwardTransformToBitReverseAVX512( + input.data(), ntt_size, prime, root_of_unity.data(), + precon_root_of_unity.data(), 2, 4); + } +} + +BENCHMARK(BM_FwdNTT_AVX512IFMALazy) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +static void BM_FwdNTT_AVX512IFMAButterfly(benchmark::State& state) { // NOLINT + size_t ntt_size = 4096; + size_t prime_bits = 49; + size_t prime = GeneratePrimes(1, prime_bits, ntt_size)[0]; + + NTT::NTTImpl ntt_impl(ntt_size, prime); + + __m512i X = _mm512_set1_epi64(prime - 3); + __m512i Y = _mm512_set1_epi64(prime / 2); + + const AlignedVector64 root_of_unity = + ntt_impl.GetRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon52RootOfUnityPowers(); + + __m512i W = _mm512_set1_epi64(root_of_unity[1]); + __m512i Wprecon = _mm512_set1_epi64(precon_root_of_unity[1]); + __m512i neg_p = _mm512_set1_epi64(-static_cast(prime)); + __m512i twice_p = _mm512_set1_epi64(prime + prime); + + for (auto _ : state) { + for (size_t i = 0; i < 1000000; ++i) { + benchmark::DoNotOptimize(i); + FwdButterfly<52, false>(&X, &Y, W, Wprecon, neg_p, twice_p); + } + } +} + +BENCHMARK(BM_FwdNTT_AVX512IFMAButterfly) + ->Unit(benchmark::kMicrosecond) + ->MinTime(1.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +#endif + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +// state[1] is approximately the number of bits in the coefficient modulus +static void BM_FwdNTT_AVX512DQ(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + uint64_t output_mod_factor = state.range(1); + size_t prime_bits = 61; + size_t prime = GeneratePrimes(1, prime_bits, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon64RootOfUnityPowers(); + for (auto _ : state) { + ForwardTransformToBitReverseAVX512( + input.data(), ntt_size, prime, root_of_unity.data(), + precon_root_of_unity.data(), 4, output_mod_factor); + } +} + +BENCHMARK(BM_FwdNTT_AVX512DQ) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024, 1}) + ->Args({1024, 4}) + ->Args({4096, 1}) + ->Args({4096, 4}) + ->Args({8192, 1}) + ->Args({8192, 4}) + ->Args({16384, 1}) + ->Args({16384, 4}); + +#endif + +//================================================================= + +// state[0] is the degree +static void BM_FwdNTTInPlace(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime = GeneratePrimes(1, 61, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT ntt(ntt_size, prime); + + for (auto _ : state) { + ntt.ComputeForward(input.data(), input.data(), 1, 1); + } +} + +BENCHMARK(BM_FwdNTTInPlace) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +// state[0] is the degree +static void BM_FwdNTTCopy(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime = GeneratePrimes(1, 61, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + AlignedVector64 output(ntt_size, 1); + NTT ntt(ntt_size, prime); + + for (auto _ : state) { + ntt.ComputeForward(input.data(), output.data(), 1, 1); + } +} + +BENCHMARK(BM_FwdNTTCopy) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +// Inverse transforms + +// state[0] is the degree +static void BM_InvNTTNative(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime = GeneratePrimes(1, 45, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetInvRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon64InvRootOfUnityPowers(); + for (auto _ : state) { + InverseTransformFromBitReverse64(input.data(), ntt_size, prime, + root_of_unity.data(), + precon_root_of_unity.data(), 1, 1); + } +} + +BENCHMARK(BM_InvNTTNative) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +#ifdef HEXL_HAS_AVX512IFMA +// state[0] is the degree +static void BM_InvNTT_AVX512IFMA(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime = GeneratePrimes(1, 49, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetInvRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon52InvRootOfUnityPowers(); + for (auto _ : state) { + InverseTransformFromBitReverseAVX512( + input.data(), ntt_size, prime, root_of_unity.data(), + precon_root_of_unity.data(), 1, 1); + } +} + +BENCHMARK(BM_InvNTT_AVX512IFMA) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024}) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +// state[0] is the degree +static void BM_InvNTT_AVX512IFMALazy(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + size_t prime = GeneratePrimes(1, 49, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetInvRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon52InvRootOfUnityPowers(); + for (auto _ : state) { + InverseTransformFromBitReverseAVX512( + input.data(), ntt_size, prime, root_of_unity.data(), + precon_root_of_unity.data(), 1, 4); + } +} + +BENCHMARK(BM_InvNTT_AVX512IFMALazy) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({4096}) + ->Args({8192}) + ->Args({16384}); + +//================================================================= + +static void BM_InvNTT_AVX512IFMAButterfly(benchmark::State& state) { // NOLINT + size_t ntt_size = 4096; + size_t prime_bits = 49; + size_t prime = GeneratePrimes(1, prime_bits, ntt_size)[0]; + + NTT::NTTImpl ntt_impl(ntt_size, prime); + + __m512i X = _mm512_set1_epi64(prime - 3); + __m512i Y = _mm512_set1_epi64(prime / 2); + + const AlignedVector64 root_of_unity = + ntt_impl.GetRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon52InvRootOfUnityPowers(); + + __m512i W = _mm512_set1_epi64(root_of_unity[1]); + __m512i Wprecon = _mm512_set1_epi64(precon_root_of_unity[1]); + __m512i neg_p = _mm512_set1_epi64(-static_cast(prime)); + __m512i twice_p = _mm512_set1_epi64(prime + prime); + + for (auto _ : state) { + for (size_t i = 0; i < 1000000; ++i) { + benchmark::DoNotOptimize(i); + InvButterfly<52, false>(&X, &Y, W, Wprecon, neg_p, twice_p); + } + } +} + +BENCHMARK(BM_InvNTT_AVX512IFMAButterfly) + ->Unit(benchmark::kMicrosecond) + ->MinTime(1.0); +#endif + +//================================================================= + +#ifdef HEXL_HAS_AVX512DQ +// state[0] is the degree +static void BM_InvNTT_AVX512DQ(benchmark::State& state) { // NOLINT + size_t ntt_size = state.range(0); + uint64_t output_mod_factor = state.range(1); + size_t prime = GeneratePrimes(1, 62, ntt_size)[0]; + + AlignedVector64 input(ntt_size, 1); + NTT::NTTImpl ntt_impl(ntt_size, prime); + + const AlignedVector64 root_of_unity = + ntt_impl.GetInvRootOfUnityPowers(); + const AlignedVector64 precon_root_of_unity = + ntt_impl.GetPrecon64InvRootOfUnityPowers(); + + for (auto _ : state) { + InverseTransformFromBitReverseAVX512( + input.data(), ntt_size, prime, root_of_unity.data(), + precon_root_of_unity.data(), output_mod_factor, output_mod_factor); + } +} + +BENCHMARK(BM_InvNTT_AVX512DQ) + ->Unit(benchmark::kMicrosecond) + ->MinTime(3.0) + ->Args({1024, 1}) + ->Args({1024, 2}) + ->Args({4096, 1}) + ->Args({4096, 2}) + ->Args({8192, 1}) + ->Args({8192, 2}) + ->Args({16384, 1}) + ->Args({16384, 2}); +#endif + +//================================================================= + +} // namespace hexl +} // namespace intel diff --git a/benchmark/main.cpp b/benchmark/main.cpp new file mode 100644 index 00000000..5d93d9d9 --- /dev/null +++ b/benchmark/main.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "logging/logging.hpp" + +int main(int argc, char** argv) { + START_EASYLOGGINGPP(argc, argv); + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake new file mode 100644 index 00000000..f5545c47 --- /dev/null +++ b/cmake/FindSphinx.cmake @@ -0,0 +1,12 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +find_program(SPHINX_EXECUTABLE + NAMES sphinx-build + DOC "Path to sphinx-build executable") + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(Sphinx + "Failed to find sphinx-build executable" + SPHINX_EXECUTABLE) diff --git a/cmake/HEXLHelpers.cmake b/cmake/HEXLHelpers.cmake new file mode 100644 index 00000000..7e3ef754 --- /dev/null +++ b/cmake/HEXLHelpers.cmake @@ -0,0 +1,26 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Checks if SOURCE_FILE can be compiled and returns 0 upon running +# If so, adds OUTPUT_FLAG to compile definitions +function(check_compile_flag SOURCE_FILE OUTPUT_FLAG) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set(NATIVE_COMPILE_DEFINITIONS "/arch:AVX512") + else() + set(NATIVE_COMPILE_DEFINITIONS "-march=native") + endif() + + try_run(CAN_RUN CAN_COMPILE ${CMAKE_BINARY_DIR} + "${SOURCE_FILE}" + COMPILE_DEFINITIONS ${NATIVE_COMPILE_DEFINITIONS} + OUTPUT_VARIABLE TRY_COMPILE_OUTPUT + ) + # Uncomment below to debug + # message("TRY_COMPILE_OUTPUT ${TRY_COMPILE_OUTPUT}") + if (CAN_COMPILE AND CAN_RUN STREQUAL 0) + message(STATUS "Setting ${OUTPUT_FLAG}") + add_definitions(-D${OUTPUT_FLAG}) + else() + message(STATUS "Compile flag not found: ${OUTPUT_FLAG}") + endif() +endfunction() diff --git a/cmake/IntelHEXLConfig.cmake.in b/cmake/IntelHEXLConfig.cmake.in new file mode 100644 index 00000000..6872c8fa --- /dev/null +++ b/cmake/IntelHEXLConfig.cmake.in @@ -0,0 +1,27 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# This will define the following variables: +# +# INTEL_HEXL_FOUND - True if the system has the Intel HEXL library +# INTEL_HEXL_VERSION - The full major.minor.patch version number +# INTEL_HEXL_VERSION_MAJOR - The major version number +# INTEL_HEXL_VERSION_MINOR - The minor version number +# INTEL_HEXL_VERSION_PATCH - The patch version number + +@PACKAGE_INIT@ + +include(${CMAKE_CURRENT_LIST_DIR}/IntelHEXLTargets.cmake) + +# Defines INTEL_HEXL_FOUND: If Intel HEXL library was found +if(TARGET intel_hexl) + set(INTEL_HEXL_FOUND TRUE) + message(STATUS "Intel HEXL found") +else() + message(STATUS "Intel HEXL not found") +endif() + +set(INTEL_HEXL_VERSION "@INTEL_HEXL_VERSION@") +set(INTEL_HEXL_VERSION_MAJOR "@INTEL_HEXL_VERSION_MAJOR@") +set(INTEL_HEXL_VERSION_MINOR "@INTEL_HEXL_VERSION_MINOR@") +set(INTEL_HEXL_VERSION_PATCH "@INTEL_HEXL_VERSION_PATCH@") diff --git a/cmake/cpu-features/CMakeLists.txt b/cmake/cpu-features/CMakeLists.txt new file mode 100644 index 00000000..e361fefe --- /dev/null +++ b/cmake/cpu-features/CMakeLists.txt @@ -0,0 +1,31 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Download and unpack cpu-features at configure time +configure_file(CMakeLists.txt.in cpu-features-download/CMakeLists.txt) +execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cpu-features-download ) +if(result) + message(FATAL_ERROR "CMake step for cpu-features failed: ${result}") +endif() +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/cpu-features-download ) +if(result) + message(FATAL_ERROR "Build step for cpu-features failed: ${result}") +endif() + +# Add cpu-features directly to our build. This defines the cpu_features target. +# Add option to override option of same name in cpu-features +option(BUILD_PIC "" OFF) +# Turn ON said option +set(BUILD_PIC ON) + +option(BUILD_SHARED_LIBS "" OFF) +# Turn OFF said option +set(BUILD_SHARED_LIBS OFF) + +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/cpu-features-src + ${CMAKE_CURRENT_BINARY_DIR}/cpu-features-build + EXCLUDE_FROM_ALL) diff --git a/cmake/cpu-features/CMakeLists.txt.in b/cmake/cpu-features/CMakeLists.txt.in new file mode 100644 index 00000000..fdcbcf99 --- /dev/null +++ b/cmake/cpu-features/CMakeLists.txt.in @@ -0,0 +1,18 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.5.1) + +project(cpu-features-download NONE) + +include(ExternalProject) +ExternalProject_Add(cpu_features + GIT_REPOSITORY https://github.com/google/cpu_features.git + GIT_TAG master + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/cpu-features-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/cpu-features-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/cmake/easylogging/CMakeLists.txt b/cmake/easylogging/CMakeLists.txt new file mode 100644 index 00000000..58536163 --- /dev/null +++ b/cmake/easylogging/CMakeLists.txt @@ -0,0 +1,31 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Download and unpack easylogging at configure time +configure_file(CMakeLists.txt.in easylogging-download/CMakeLists.txt) +set(CONFIGURE_COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .) +execute_process(COMMAND ${CONFIGURE_COMMAND} + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/easylogging-download ) +if(result) + message(FATAL_ERROR "CMake step for easylogging failed: ${result}") +endif() +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/easylogging-download ) +if(result) + message(FATAL_ERROR "Build step for easylogging failed: ${result}") +endif() + +# Add easylogging directly to our build. This defines the easylogging target. +# Add option to override option of same name in easylogging +option(build_static_lib "" OFF) +# Turn on said option +set(build_static_lib ON) + +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/easylogging-src + ${CMAKE_CURRENT_BINARY_DIR}/easylogging-build + EXCLUDE_FROM_ALL) + +set_target_properties(easyloggingpp PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + ${CMAKE_CURRENT_BINARY_DIR}/easylogging-src/src) diff --git a/cmake/easylogging/CMakeLists.txt.in b/cmake/easylogging/CMakeLists.txt.in new file mode 100644 index 00000000..a7186797 --- /dev/null +++ b/cmake/easylogging/CMakeLists.txt.in @@ -0,0 +1,18 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.5.1) + +project(easylogging-download NONE) + +include(ExternalProject) +ExternalProject_Add(easylogging + GIT_REPOSITORY https://github.com/amrayn/easyloggingpp.git + GIT_TAG master + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/easylogging-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/easylogging-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/cmake/gbenchmark/CMakeLists.txt b/cmake/gbenchmark/CMakeLists.txt new file mode 100644 index 00000000..c6a29b81 --- /dev/null +++ b/cmake/gbenchmark/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Download and unpack gbenchmark at configure time +configure_file(CMakeLists.txt.in gbenchmark-download/CMakeLists.txt) +set(CONFIGURE_COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .) +execute_process(COMMAND ${CONFIGURE_COMMAND} + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/gbenchmark-download ) +if(result) + message(FATAL_ERROR "CMake step for gbenchmark failed: ${result}") +endif() +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/gbenchmark-download ) +if(result) + message(FATAL_ERROR "Build step for gbenchmark failed: ${result}") +endif() + +# Add gbenchmark directly to our build. This defines the gbenchmark target. +# Add option to override option of same name in gbenchmark +option(BENCHMARK_ENABLE_TESTING "" ON) +# Turn off said option +set(BENCHMARK_ENABLE_TESTING OFF) +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/gbenchmark-src + ${CMAKE_CURRENT_BINARY_DIR}/gbenchmark-build + EXCLUDE_FROM_ALL) diff --git a/cmake/gbenchmark/CMakeLists.txt.in b/cmake/gbenchmark/CMakeLists.txt.in new file mode 100644 index 00000000..d0603cb6 --- /dev/null +++ b/cmake/gbenchmark/CMakeLists.txt.in @@ -0,0 +1,18 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.5.1) + +project(google-benchmark-download NONE) + +include(ExternalProject) +ExternalProject_Add(gbenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG master + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/gbenchmark-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/cmake/gflags/CMakeLists.txt b/cmake/gflags/CMakeLists.txt new file mode 100644 index 00000000..a4b78509 --- /dev/null +++ b/cmake/gflags/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Download and unpack gflags at configure time +configure_file(CMakeLists.txt.in gflags-download/CMakeLists.txt) +set(CONFIGURE_COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .) +execute_process(COMMAND ${CONFIGURE_COMMAND} + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/gflags-download ) +if(result) + message(FATAL_ERROR "CMake step for gflags failed: ${result}") +endif() +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/gflags-download ) +if(result) + message(FATAL_ERROR "Build step for gflags failed: ${result}") +endif() + +# Add gflags directly to our build. This defines the gflags target. +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/gflags-src + ${CMAKE_CURRENT_BINARY_DIR}/gflags-build + EXCLUDE_FROM_ALL) diff --git a/cmake/gflags/CMakeLists.txt.in b/cmake/gflags/CMakeLists.txt.in new file mode 100644 index 00000000..6639e319 --- /dev/null +++ b/cmake/gflags/CMakeLists.txt.in @@ -0,0 +1,18 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.5.1) + +project(gflags-download NONE) + +include(ExternalProject) +ExternalProject_Add(gflags + GIT_REPOSITORY https://github.com/gflags/gflags.git + GIT_TAG v2.2.2 + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gflags-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/gflags-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/cmake/gtest/CMakeLists.txt b/cmake/gtest/CMakeLists.txt new file mode 100644 index 00000000..6bf78a96 --- /dev/null +++ b/cmake/gtest/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Download and unpack gtest at configure time +configure_file(CMakeLists.txt.in gtest-download/CMakeLists.txt) +set(CONFIGURE_COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .) +execute_process(COMMAND ${CONFIGURE_COMMAND} + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/gtest-download ) +if(result) + message(FATAL_ERROR "CMake step for gtest failed: ${result}") +endif() +execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/gtest-download ) +if(result) + message(FATAL_ERROR "Build step for gtest failed: ${result}") +endif() + +# Add gtest directly to our build. This defines the gtest target. +# Add option to override option of same name in gtest +option (gtest_force_shared_crt "" OFF) +# Turn on said option +set(gtest_force_shared_crt ON) +add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/gtest-src + ${CMAKE_CURRENT_BINARY_DIR}/gtest-build + EXCLUDE_FROM_ALL) diff --git a/cmake/gtest/CMakeLists.txt.in b/cmake/gtest/CMakeLists.txt.in new file mode 100644 index 00000000..b5c4b88f --- /dev/null +++ b/cmake/gtest/CMakeLists.txt.in @@ -0,0 +1,18 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.5.1) + +project(google-test-download NONE) + +include(ExternalProject) +ExternalProject_Add(gtest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.10.0 + SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gtest-src" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/gtest-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) diff --git a/cmake/test-avx256.cpp b/cmake/test-avx256.cpp new file mode 100644 index 00000000..92d9df94 --- /dev/null +++ b/cmake/test-avx256.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +int main() { + __m256i one = _mm256_set1_epi64x(1); + __m256i two = _mm256_set1_epi64x(2); + __m256i sum = _mm256_add_epi64(one, two); + int result = _mm256_extract_epi64(sum, 0); + int expected = 3; + return (result == expected) ? 0 : 1; +} diff --git a/cmake/test-avx512dq.cpp b/cmake/test-avx512dq.cpp new file mode 100644 index 00000000..774c2f32 --- /dev/null +++ b/cmake/test-avx512dq.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +int main() { + __m512i one = _mm512_set1_epi64(1); + __m512i two = _mm512_set1_epi64(2); + __m512i sum = _mm512_add_epi64(one, two); + __m256i sum0 = _mm512_extracti64x4_epi64(sum, 0); + int result = _mm256_extract_epi64(sum0, 0); + int expected = 3; + return (result == expected) ? 0 : 1; +} diff --git a/cmake/test-avx512ifma.cpp b/cmake/test-avx512ifma.cpp new file mode 100644 index 00000000..f11e68ba --- /dev/null +++ b/cmake/test-avx512ifma.cpp @@ -0,0 +1,15 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +int main() { + __m512i zero = _mm512_set1_epi64(0); + __m512i one = _mm512_set1_epi64(1); + __m512i two = _mm512_set1_epi64(2); + __m512i out = _mm512_madd52lo_epu64(zero, one, two); + __m256i out0 = _mm512_extracti64x4_epi64(out, 0); + int result = _mm256_extract_epi64(out0, 0); + int expected = 2; + return (result == expected) ? 0 : 1; +} diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 00000000..ae9ef6e8 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Build Doxygen documentation +find_package(Doxygen REQUIRED) + +set(DOXYGEN_INDEX_FILE ${CMAKE_CURRENT_SOURCE_DIR}/doxygen/xml/indexl.html) +set(DOXYGEN_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/doxygen) +set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in) +set(DOXYFILE_OUT ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile) + +# Create Doxyfile +configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY) + +file(MAKE_DIRECTORY ${DOXYGEN_OUTPUT_DIR}) # Doxygen won't create this for us + +add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE} + DEPENDS ${HEXL_INC_ROOT_DIR} + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT} + MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN} + COMMENT "Generating Doxygen documentation") + +add_custom_target(doxygen DEPENDS ${DOXYGEN_INDEX_FILE}) + +# Build Sphinx documentation + +find_package(Sphinx REQUIRED) + +set(SPHINX_CONF_IN ${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in) +set(SPHINX_CONF_OUT ${CMAKE_CURRENT_SOURCE_DIR}/conf.py) +set(SPHINX_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}) +set(SPHINX_BUILD ${CMAKE_CURRENT_SOURCE_DIR}/sphinx) + +# Create Sphinx config +configure_file(${SPHINX_CONF_IN} ${SPHINX_CONF_OUT} @ONLY) +add_custom_target(sphinx + COMMAND ${SPHINX_EXECUTABLE} -b html + # Tell Breathe where to find the Doxygen output + -Dbreathe_projects.IntelHEXL=${DOXYGEN_OUTPUT_DIR}/xml + ${SPHINX_SOURCE} ${SPHINX_BUILD} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS doxygen + ${CMAKE_CURRENT_SOURCE_DIR}/index.rst + ${DOXYGEN_INDEX_FILE} + COMMENT "Generating documentation with Sphinx") + +add_custom_target(docs COMMAND make sphinx DEPENDS sphinx) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in new file mode 100644 index 00000000..7173638d --- /dev/null +++ b/docs/Doxyfile.in @@ -0,0 +1,29 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +PROJECT_NAME = "Intel HEXL" +PROJECT_BRIEF = "Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption." + +OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@/doxygen +INPUT = @CMAKE_SOURCE_DIR@/intel-hexl/include \ + @CMAKE_SOURCE_DIR@/README.md +RECURSIVE = YES +USE_MDFILE_AS_MAINPAGE = README.md +USE_MATHJAX = YES + +GENERATE_XML = YES +EXTRACT_ALL = YES +EXTRACT_PRIVATE = NO +SHOW_NAMESPACES = YES +GENERATE_LATEX = YES + +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = YES +WARN_AS_ERROR = YES + +QUIET = NO + +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..bae94bad --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,35 @@ +.. Copyright (C) 2020-2021 Intel Corporation +.. SPDX-License-Identifier: Apache-2.0 + + +Intel HEXL API Reference +============================== + +NTT +---------------- +.. doxygenfile:: ntt.hpp + +EltwiseAddMod +---------------- +.. doxygenfile:: eltwise-add-mod.hpp + +EltwiseCmpAdd +---------------- +.. doxygenfile:: eltwise-cmp-add.hpp + + +EltwiseCmpSubMod +---------------- +.. doxygenfile:: eltwise-cmp-sub-mod.hpp + +EltwiseFMA +---------------- +.. doxygenfile:: eltwise-fma-mod.hpp + +EltiwseMultMod +---------------- +.. doxygenfile:: eltwise-mult-mod.hpp + +EltiwseReduceMod +---------------- +.. doxygenfile:: eltwise-reduce-mod.hpp diff --git a/docs/conf.py.in b/docs/conf.py.in new file mode 100644 index 00000000..cdb62196 --- /dev/null +++ b/docs/conf.py.in @@ -0,0 +1,33 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import sphinx_rtd_theme + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.mathjax', + 'sphinx.ext.ifconfig', + 'sphinx.ext.viewcode', + 'sphinx_rtd_theme, + 'breathe', + 'm2r2' + ] + +# General information about the project. +project = u'Intel HEXL' +copyright = '2020-2021, Intel Corporation' +author = 'Intel Corporation' + +breathe_projects = {"IntelHEXL": "@CMAKE_CURRENT_BINARY_DIR@/xml/"} + +# Breathe Configuration +breathe_default_project = "IntelHEXL" + +breathe_default_members = ('members', 'undoc-members') + +html_theme = 'sphinx_rtd_theme' +source_suffix = ['.rst', '.md'] diff --git a/docs/docs/doxygen/html/_r_e_a_d_m_e_8md.html b/docs/docs/doxygen/html/_r_e_a_d_m_e_8md.html new file mode 100644 index 00000000..850bef3f --- /dev/null +++ b/docs/docs/doxygen/html/_r_e_a_d_m_e_8md.html @@ -0,0 +1,84 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/README.md File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ +
+
+
+
/Users/fboemer/repos/DBIO/intel-hexl/README.md File Reference
+
+
+
+ + + + diff --git a/docs/docs/doxygen/html/annotated.html b/docs/docs/doxygen/html/annotated.html new file mode 100644 index 00000000..7d8d28c7 --- /dev/null +++ b/docs/docs/doxygen/html/annotated.html @@ -0,0 +1,91 @@ + + + + + + + +Intel HEXL: Class List + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class List
+
+
+
Here are the classes, structs, unions and interfaces with brief descriptions:
+
[detail level 123]
+ + + +
 Nintel
 Nhexl
 CNTTPerforms negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE cryptography
+
+
+ + + + diff --git a/docs/docs/doxygen/html/bc_s.png b/docs/docs/doxygen/html/bc_s.png new file mode 100644 index 00000000..224b29aa Binary files /dev/null and b/docs/docs/doxygen/html/bc_s.png differ diff --git a/docs/docs/doxygen/html/bdwn.png b/docs/docs/doxygen/html/bdwn.png new file mode 100644 index 00000000..940a0b95 Binary files /dev/null and b/docs/docs/doxygen/html/bdwn.png differ diff --git a/docs/docs/doxygen/html/classes.html b/docs/docs/doxygen/html/classes.html new file mode 100644 index 00000000..f28c12f5 --- /dev/null +++ b/docs/docs/doxygen/html/classes.html @@ -0,0 +1,95 @@ + + + + + + + +Intel HEXL: Class Index + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class Index
+
+
+ + + + + + + + +
  n  
+
NTT (intel::hexl)   
+ +
+ + + + diff --git a/docs/docs/doxygen/html/classintel_1_1hexl_1_1_n_t_t-members.html b/docs/docs/doxygen/html/classintel_1_1hexl_1_1_n_t_t-members.html new file mode 100644 index 00000000..aa6c44e4 --- /dev/null +++ b/docs/docs/doxygen/html/classintel_1_1hexl_1_1_n_t_t-members.html @@ -0,0 +1,97 @@ + + + + + + + +Intel HEXL: Member List + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
intel::hexl::NTT Member List
+
+
+ +

This is the complete list of members for intel::hexl::NTT, including all inherited members.

+ + + + + + + +
ComputeForward(uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor)intel::hexl::NTT
ComputeInverse(uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor)intel::hexl::NTT
NTT()intel::hexl::NTT
NTT(uint64_t degree, uint64_t p)intel::hexl::NTT
NTT(uint64_t degree, uint64_t p, uint64_t root_of_unity)intel::hexl::NTT
~NTT()intel::hexl::NTT
+ + + + diff --git a/docs/docs/doxygen/html/classintel_1_1hexl_1_1_n_t_t.html b/docs/docs/doxygen/html/classintel_1_1hexl_1_1_n_t_t.html new file mode 100644 index 00000000..e3a1c8dd --- /dev/null +++ b/docs/docs/doxygen/html/classintel_1_1hexl_1_1_n_t_t.html @@ -0,0 +1,345 @@ + + + + + + + +Intel HEXL: intel::hexl::NTT Class Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
intel::hexl::NTT Class Reference
+
+
+ +

Performs negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE cryptography. + More...

+ +

#include <ntt.hpp>

+ + + + + + + + + + + + + + + + + + + +

+Public Member Functions

 NTT ()
 Initializes an empty NTT object. More...
 
 ~NTT ()
 Destructs the NTT object. More...
 
 NTT (uint64_t degree, uint64_t p)
 Performs pre-computation necessary for forward and inverse transforms. More...
 
 NTT (uint64_t degree, uint64_t p, uint64_t root_of_unity)
 Initializes an NTT object with degree degree and modulus p. More...
 
void ComputeForward (uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor)
 Compute forward NTT. Results are bit-reversed. More...
 
void ComputeInverse (uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor)
 
+

Detailed Description

+

Performs negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE cryptography.

+

The number-theoretic transform (NTT) specializes the discrete Fourier transform (DFT) to the finite field \( \mathbb{Z}_p / (X^N + 1) \).

+

Constructor & Destructor Documentation

+ +

◆ NTT() [1/3]

+ +
+
+ + + + + + + +
intel::hexl::NTT::NTT ()
+
+ +

Initializes an empty NTT object.

+ +
+
+ +

◆ ~NTT()

+ +
+
+ + + + + + + +
intel::hexl::NTT::~NTT ()
+
+ +

Destructs the NTT object.

+ +
+
+ +

◆ NTT() [2/3]

+ +
+
+ + + + + + + + + + + + + + + + + + +
intel::hexl::NTT::NTT (uint64_t degree,
uint64_t p 
)
+
+ +

Performs pre-computation necessary for forward and inverse transforms.

+

Initializes an NTT object with degree degree and modulus p.

Parameters
+ + + +
[in]degreea.k.a. N. Size of the NTT transform. Must be a power of 2
[in]pPrime modulus. Must satisfy \( p == 1 \mod 2N \)
+
+
+ +
+
+ +

◆ NTT() [3/3]

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
intel::hexl::NTT::NTT (uint64_t degree,
uint64_t p,
uint64_t root_of_unity 
)
+
+ +

Initializes an NTT object with degree degree and modulus p.

+
Parameters
+ + + + +
[in]degreea.k.a. N. Size of the NTT transform. Must be a power of 2
[in]pPrime modulus. Must satisfy \( p == 1 \mod 2N \)
[in]root_of_unity2N'th root of unity in \( \mathbb{Z_p} \).
+
+
+

Performs pre-computation necessary for forward and inverse transforms

+ +
+
+

Member Function Documentation

+ +

◆ ComputeForward()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void intel::hexl::NTT::ComputeForward (uint64_t * result,
const uint64_t * operand,
uint64_t input_mod_factor,
uint64_t output_mod_factor 
)
+
+ +

Compute forward NTT. Results are bit-reversed.

+
Parameters
+ + + + + +
[out]resultStores the result
[in]operandData on which to compute the NTT
[in]input_mod_factorAssume input operand are in [0, input_mod_factor * p). Must be 1, 2 or 4.
[in]output_mod_factorReturns output operand in [0, output_mod_factor * p). Must be 1 or 4.
+
+
+ +
+
+ +

◆ ComputeInverse()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void intel::hexl::NTT::ComputeInverse (uint64_t * result,
const uint64_t * operand,
uint64_t input_mod_factor,
uint64_t output_mod_factor 
)
+
+

Compute inverse NTT. Results are bit-reversed.

Parameters
+ + + + + +
[out]resultStores the result
[in]operandData on which to compute the NTT
[in]input_mod_factorAssume input operand are in [0, input_mod_factor * p). Must be 1 or 2.
[in]output_mod_factorReturns output operand in [0, output_mod_factor * p). Must be 1 or 2.
+
+
+ +
+
+
The documentation for this class was generated from the following file:
    +
  • /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt/ntt.hpp
  • +
+
+ + + + diff --git a/docs/docs/doxygen/html/closed.png b/docs/docs/doxygen/html/closed.png new file mode 100644 index 00000000..98cc2c90 Binary files /dev/null and b/docs/docs/doxygen/html/closed.png differ diff --git a/docs/docs/doxygen/html/dir_60e7388d20bffeeed71217422ae2faa2.html b/docs/docs/doxygen/html/dir_60e7388d20bffeeed71217422ae2faa2.html new file mode 100644 index 00000000..772a2bac --- /dev/null +++ b/docs/docs/doxygen/html/dir_60e7388d20bffeeed71217422ae2faa2.html @@ -0,0 +1,94 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/util Directory Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
util Directory Reference
+
+
+ + + + +

+Files

file  util.hpp [code]
 
+
+ + + + diff --git a/docs/docs/doxygen/html/dir_7759c3a881395f02ed4947f5a1aa5b9c.html b/docs/docs/doxygen/html/dir_7759c3a881395f02ed4947f5a1aa5b9c.html new file mode 100644 index 00000000..c9f0237c --- /dev/null +++ b/docs/docs/doxygen/html/dir_7759c3a881395f02ed4947f5a1aa5b9c.html @@ -0,0 +1,103 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl Directory Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
intel-hexl Directory Reference
+
+
+ + + + + + + + +

+Directories

directory  eltwise
 
directory  ntt
 
directory  util
 
+ + + +

+Files

file  intel-hexl.hpp [code]
 
+
+ + + + diff --git a/docs/docs/doxygen/html/dir_8420cd92772e9af80a76f3e30148eb70.html b/docs/docs/doxygen/html/dir_8420cd92772e9af80a76f3e30148eb70.html new file mode 100644 index 00000000..abb3c813 --- /dev/null +++ b/docs/docs/doxygen/html/dir_8420cd92772e9af80a76f3e30148eb70.html @@ -0,0 +1,94 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl Directory Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
intel-hexl Directory Reference
+
+
+ + + + +

+Directories

directory  include
 
+
+ + + + diff --git a/docs/docs/doxygen/html/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.html b/docs/docs/doxygen/html/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.html new file mode 100644 index 00000000..40d40fc3 --- /dev/null +++ b/docs/docs/doxygen/html/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.html @@ -0,0 +1,94 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include Directory Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
include Directory Reference
+
+
+ + + + +

+Directories

directory  intel-hexl
 
+
+ + + + diff --git a/docs/docs/doxygen/html/dir_b272e9f08317806cfbaee27c029c625d.html b/docs/docs/doxygen/html/dir_b272e9f08317806cfbaee27c029c625d.html new file mode 100644 index 00000000..252af357 --- /dev/null +++ b/docs/docs/doxygen/html/dir_b272e9f08317806cfbaee27c029c625d.html @@ -0,0 +1,94 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt Directory Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
ntt Directory Reference
+
+
+ + + + +

+Files

file  ntt.hpp [code]
 
+
+ + + + diff --git a/docs/docs/doxygen/html/dir_b327ef8739a3d23275834e47dda5cef1.html b/docs/docs/doxygen/html/dir_b327ef8739a3d23275834e47dda5cef1.html new file mode 100644 index 00000000..74acf520 --- /dev/null +++ b/docs/docs/doxygen/html/dir_b327ef8739a3d23275834e47dda5cef1.html @@ -0,0 +1,104 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise Directory Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise Directory Reference
+
+ + + + + diff --git a/docs/docs/doxygen/html/doc.png b/docs/docs/doxygen/html/doc.png new file mode 100644 index 00000000..17edabff Binary files /dev/null and b/docs/docs/doxygen/html/doc.png differ diff --git a/docs/docs/doxygen/html/doxygen.css b/docs/docs/doxygen/html/doxygen.css new file mode 100644 index 00000000..73ecbb2c --- /dev/null +++ b/docs/docs/doxygen/html/doxygen.css @@ -0,0 +1,1771 @@ +/* The standard CSS for doxygen 1.8.17 */ + +body, table, div, p, dl { + font: 400 14px/22px Roboto,sans-serif; +} + +p.reference, p.definition { + font: 400 14px/22px Roboto,sans-serif; +} + +/* @group Heading Levels */ + +h1.groupheader { + font-size: 150%; +} + +.title { + font: 400 14px/28px Roboto,sans-serif; + font-size: 150%; + font-weight: bold; + margin: 10px 2px; +} + +h2.groupheader { + border-bottom: 1px solid #879ECB; + color: #354C7B; + font-size: 150%; + font-weight: normal; + margin-top: 1.75em; + padding-top: 8px; + padding-bottom: 4px; + width: 100%; +} + +h3.groupheader { + font-size: 100%; +} + +h1, h2, h3, h4, h5, h6 { + -webkit-transition: text-shadow 0.5s linear; + -moz-transition: text-shadow 0.5s linear; + -ms-transition: text-shadow 0.5s linear; + -o-transition: text-shadow 0.5s linear; + transition: text-shadow 0.5s linear; + margin-right: 15px; +} + +h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow { + text-shadow: 0 0 15px cyan; +} + +dt { + font-weight: bold; +} + +ul.multicol { + -moz-column-gap: 1em; + -webkit-column-gap: 1em; + column-gap: 1em; + -moz-column-count: 3; + -webkit-column-count: 3; + column-count: 3; +} + +p.startli, p.startdd { + margin-top: 2px; +} + +th p.starttd, p.intertd, p.endtd { + font-size: 100%; + font-weight: 700; +} + +p.starttd { + margin-top: 0px; +} + +p.endli { + margin-bottom: 0px; +} + +p.enddd { + margin-bottom: 4px; +} + +p.endtd { + margin-bottom: 2px; +} + +p.interli { +} + +p.interdd { +} + +p.intertd { +} + +/* @end */ + +caption { + font-weight: bold; +} + +span.legend { + font-size: 70%; + text-align: center; +} + +h3.version { + font-size: 90%; + text-align: center; +} + +div.qindex, div.navtab{ + background-color: #EBEFF6; + border: 1px solid #A3B4D7; + text-align: center; +} + +div.qindex, div.navpath { + width: 100%; + line-height: 140%; +} + +div.navtab { + margin-right: 15px; +} + +/* @group Link Styling */ + +a { + color: #3D578C; + font-weight: normal; + text-decoration: none; +} + +.contents a:visited { + color: #4665A2; +} + +a:hover { + text-decoration: underline; +} + +a.qindex { + font-weight: bold; +} + +a.qindexHL { + font-weight: bold; + background-color: #9CAFD4; + color: #FFFFFF; + border: 1px double #869DCA; +} + +.contents a.qindexHL:visited { + color: #FFFFFF; +} + +a.el { + font-weight: bold; +} + +a.elRef { +} + +a.code, a.code:visited, a.line, a.line:visited { + color: #4665A2; +} + +a.codeRef, a.codeRef:visited, a.lineRef, a.lineRef:visited { + color: #4665A2; +} + +/* @end */ + +dl.el { + margin-left: -1cm; +} + +ul { + overflow: hidden; /*Fixed: list item bullets overlap floating elements*/ +} + +#side-nav ul { + overflow: visible; /* reset ul rule for scroll bar in GENERATE_TREEVIEW window */ +} + +#main-nav ul { + overflow: visible; /* reset ul rule for the navigation bar drop down lists */ +} + +.fragment { + text-align: left; + direction: ltr; + overflow-x: auto; /*Fixed: fragment lines overlap floating elements*/ + overflow-y: hidden; +} + +pre.fragment { + border: 1px solid #C4CFE5; + background-color: #FBFCFD; + padding: 4px 6px; + margin: 4px 8px 4px 2px; + overflow: auto; + word-wrap: break-word; + font-size: 9pt; + line-height: 125%; + font-family: monospace, fixed; + font-size: 105%; +} + +div.fragment { + padding: 0 0 1px 0; /*Fixed: last line underline overlap border*/ + margin: 4px 8px 4px 2px; + background-color: #FBFCFD; + border: 1px solid #C4CFE5; +} + +div.line { + font-family: monospace, fixed; + font-size: 13px; + min-height: 13px; + line-height: 1.0; + text-wrap: unrestricted; + white-space: -moz-pre-wrap; /* Moz */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + white-space: pre-wrap; /* CSS3 */ + word-wrap: break-word; /* IE 5.5+ */ + text-indent: -53px; + padding-left: 53px; + padding-bottom: 0px; + margin: 0px; + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; +} + +div.line:after { + content:"\000A"; + white-space: pre; +} + +div.line.glow { + background-color: cyan; + box-shadow: 0 0 10px cyan; +} + + +span.lineno { + padding-right: 4px; + text-align: right; + border-right: 2px solid #0F0; + background-color: #E8E8E8; + white-space: pre; +} +span.lineno a { + background-color: #D8D8D8; +} + +span.lineno a:hover { + background-color: #C8C8C8; +} + +.lineno { + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +div.ah, span.ah { + background-color: black; + font-weight: bold; + color: #FFFFFF; + margin-bottom: 3px; + margin-top: 3px; + padding: 0.2em; + border: solid thin #333; + border-radius: 0.5em; + -webkit-border-radius: .5em; + -moz-border-radius: .5em; + box-shadow: 2px 2px 3px #999; + -webkit-box-shadow: 2px 2px 3px #999; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#000),color-stop(0.3, #444)); + background-image: -moz-linear-gradient(center top, #eee 0%, #444 40%, #000 110%); +} + +div.classindex ul { + list-style: none; + padding-left: 0; +} + +div.classindex span.ai { + display: inline-block; +} + +div.groupHeader { + margin-left: 16px; + margin-top: 12px; + font-weight: bold; +} + +div.groupText { + margin-left: 16px; + font-style: italic; +} + +body { + background-color: white; + color: black; + margin: 0; +} + +div.contents { + margin-top: 10px; + margin-left: 12px; + margin-right: 8px; +} + +td.indexkey { + background-color: #EBEFF6; + font-weight: bold; + border: 1px solid #C4CFE5; + margin: 2px 0px 2px 0; + padding: 2px 10px; + white-space: nowrap; + vertical-align: top; +} + +td.indexvalue { + background-color: #EBEFF6; + border: 1px solid #C4CFE5; + padding: 2px 10px; + margin: 2px 0px; +} + +tr.memlist { + background-color: #EEF1F7; +} + +p.formulaDsp { + text-align: center; +} + +img.formulaDsp { + +} + +img.formulaInl, img.inline { + vertical-align: middle; +} + +div.center { + text-align: center; + margin-top: 0px; + margin-bottom: 0px; + padding: 0px; +} + +div.center img { + border: 0px; +} + +address.footer { + text-align: right; + padding-right: 12px; +} + +img.footer { + border: 0px; + vertical-align: middle; +} + +/* @group Code Colorization */ + +span.keyword { + color: #008000 +} + +span.keywordtype { + color: #604020 +} + +span.keywordflow { + color: #e08000 +} + +span.comment { + color: #800000 +} + +span.preprocessor { + color: #806020 +} + +span.stringliteral { + color: #002080 +} + +span.charliteral { + color: #008080 +} + +span.vhdldigit { + color: #ff00ff +} + +span.vhdlchar { + color: #000000 +} + +span.vhdlkeyword { + color: #700070 +} + +span.vhdllogic { + color: #ff0000 +} + +blockquote { + background-color: #F7F8FB; + border-left: 2px solid #9CAFD4; + margin: 0 24px 0 4px; + padding: 0 12px 0 16px; +} + +blockquote.DocNodeRTL { + border-left: 0; + border-right: 2px solid #9CAFD4; + margin: 0 4px 0 24px; + padding: 0 16px 0 12px; +} + +/* @end */ + +/* +.search { + color: #003399; + font-weight: bold; +} + +form.search { + margin-bottom: 0px; + margin-top: 0px; +} + +input.search { + font-size: 75%; + color: #000080; + font-weight: normal; + background-color: #e8eef2; +} +*/ + +td.tiny { + font-size: 75%; +} + +.dirtab { + padding: 4px; + border-collapse: collapse; + border: 1px solid #A3B4D7; +} + +th.dirtab { + background: #EBEFF6; + font-weight: bold; +} + +hr { + height: 0px; + border: none; + border-top: 1px solid #4A6AAA; +} + +hr.footer { + height: 1px; +} + +/* @group Member Descriptions */ + +table.memberdecls { + border-spacing: 0px; + padding: 0px; +} + +.memberdecls td, .fieldtable tr { + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; +} + +.memberdecls td.glow, .fieldtable tr.glow { + background-color: cyan; + box-shadow: 0 0 15px cyan; +} + +.mdescLeft, .mdescRight, +.memItemLeft, .memItemRight, +.memTemplItemLeft, .memTemplItemRight, .memTemplParams { + background-color: #F9FAFC; + border: none; + margin: 4px; + padding: 1px 0 0 8px; +} + +.mdescLeft, .mdescRight { + padding: 0px 8px 4px 8px; + color: #555; +} + +.memSeparator { + border-bottom: 1px solid #DEE4F0; + line-height: 1px; + margin: 0px; + padding: 0px; +} + +.memItemLeft, .memTemplItemLeft { + white-space: nowrap; +} + +.memItemRight, .memTemplItemRight { + width: 100%; +} + +.memTemplParams { + color: #4665A2; + white-space: nowrap; + font-size: 80%; +} + +/* @end */ + +/* @group Member Details */ + +/* Styles for detailed member documentation */ + +.memtitle { + padding: 8px; + border-top: 1px solid #A8B8D9; + border-left: 1px solid #A8B8D9; + border-right: 1px solid #A8B8D9; + border-top-right-radius: 4px; + border-top-left-radius: 4px; + margin-bottom: -1px; + background-image: url('nav_f.png'); + background-repeat: repeat-x; + background-color: #E2E8F2; + line-height: 1.25; + font-weight: 300; + float:left; +} + +.permalink +{ + font-size: 65%; + display: inline-block; + vertical-align: middle; +} + +.memtemplate { + font-size: 80%; + color: #4665A2; + font-weight: normal; + margin-left: 9px; +} + +.memnav { + background-color: #EBEFF6; + border: 1px solid #A3B4D7; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} + +.mempage { + width: 100%; +} + +.memitem { + padding: 0; + margin-bottom: 10px; + margin-right: 5px; + -webkit-transition: box-shadow 0.5s linear; + -moz-transition: box-shadow 0.5s linear; + -ms-transition: box-shadow 0.5s linear; + -o-transition: box-shadow 0.5s linear; + transition: box-shadow 0.5s linear; + display: table !important; + width: 100%; +} + +.memitem.glow { + box-shadow: 0 0 15px cyan; +} + +.memname { + font-weight: 400; + margin-left: 6px; +} + +.memname td { + vertical-align: bottom; +} + +.memproto, dl.reflist dt { + border-top: 1px solid #A8B8D9; + border-left: 1px solid #A8B8D9; + border-right: 1px solid #A8B8D9; + padding: 6px 0px 6px 0px; + color: #253555; + font-weight: bold; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + background-color: #DFE5F1; + /* opera specific markup */ + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + border-top-right-radius: 4px; + /* firefox specific markup */ + -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px; + -moz-border-radius-topright: 4px; + /* webkit specific markup */ + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -webkit-border-top-right-radius: 4px; + +} + +.overload { + font-family: "courier new",courier,monospace; + font-size: 65%; +} + +.memdoc, dl.reflist dd { + border-bottom: 1px solid #A8B8D9; + border-left: 1px solid #A8B8D9; + border-right: 1px solid #A8B8D9; + padding: 6px 10px 2px 10px; + background-color: #FBFCFD; + border-top-width: 0; + background-image:url('nav_g.png'); + background-repeat:repeat-x; + background-color: #FFFFFF; + /* opera specific markup */ + border-bottom-left-radius: 4px; + border-bottom-right-radius: 4px; + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + /* firefox specific markup */ + -moz-border-radius-bottomleft: 4px; + -moz-border-radius-bottomright: 4px; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px; + /* webkit specific markup */ + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +dl.reflist dt { + padding: 5px; +} + +dl.reflist dd { + margin: 0px 0px 10px 0px; + padding: 5px; +} + +.paramkey { + text-align: right; +} + +.paramtype { + white-space: nowrap; +} + +.paramname { + color: #602020; + white-space: nowrap; +} +.paramname em { + font-style: normal; +} +.paramname code { + line-height: 14px; +} + +.params, .retval, .exception, .tparams { + margin-left: 0px; + padding-left: 0px; +} + +.params .paramname, .retval .paramname, .tparams .paramname, .exception .paramname { + font-weight: bold; + vertical-align: top; +} + +.params .paramtype, .tparams .paramtype { + font-style: italic; + vertical-align: top; +} + +.params .paramdir, .tparams .paramdir { + font-family: "courier new",courier,monospace; + vertical-align: top; +} + +table.mlabels { + border-spacing: 0px; +} + +td.mlabels-left { + width: 100%; + padding: 0px; +} + +td.mlabels-right { + vertical-align: bottom; + padding: 0px; + white-space: nowrap; +} + +span.mlabels { + margin-left: 8px; +} + +span.mlabel { + background-color: #728DC1; + border-top:1px solid #5373B4; + border-left:1px solid #5373B4; + border-right:1px solid #C4CFE5; + border-bottom:1px solid #C4CFE5; + text-shadow: none; + color: white; + margin-right: 4px; + padding: 2px 3px; + border-radius: 3px; + font-size: 7pt; + white-space: nowrap; + vertical-align: middle; +} + + + +/* @end */ + +/* these are for tree view inside a (index) page */ + +div.directory { + margin: 10px 0px; + border-top: 1px solid #9CAFD4; + border-bottom: 1px solid #9CAFD4; + width: 100%; +} + +.directory table { + border-collapse:collapse; +} + +.directory td { + margin: 0px; + padding: 0px; + vertical-align: top; +} + +.directory td.entry { + white-space: nowrap; + padding-right: 6px; + padding-top: 3px; +} + +.directory td.entry a { + outline:none; +} + +.directory td.entry a img { + border: none; +} + +.directory td.desc { + width: 100%; + padding-left: 6px; + padding-right: 6px; + padding-top: 3px; + border-left: 1px solid rgba(0,0,0,0.05); +} + +.directory tr.even { + padding-left: 6px; + background-color: #F7F8FB; +} + +.directory img { + vertical-align: -30%; +} + +.directory .levels { + white-space: nowrap; + width: 100%; + text-align: right; + font-size: 9pt; +} + +.directory .levels span { + cursor: pointer; + padding-left: 2px; + padding-right: 2px; + color: #3D578C; +} + +.arrow { + color: #9CAFD4; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; + cursor: pointer; + font-size: 80%; + display: inline-block; + width: 16px; + height: 22px; +} + +.icon { + font-family: Arial, Helvetica; + font-weight: bold; + font-size: 12px; + height: 14px; + width: 16px; + display: inline-block; + background-color: #728DC1; + color: white; + text-align: center; + border-radius: 4px; + margin-left: 2px; + margin-right: 2px; +} + +.icona { + width: 24px; + height: 22px; + display: inline-block; +} + +.iconfopen { + width: 24px; + height: 18px; + margin-bottom: 4px; + background-image:url('folderopen.png'); + background-position: 0px -4px; + background-repeat: repeat-y; + vertical-align:top; + display: inline-block; +} + +.iconfclosed { + width: 24px; + height: 18px; + margin-bottom: 4px; + background-image:url('folderclosed.png'); + background-position: 0px -4px; + background-repeat: repeat-y; + vertical-align:top; + display: inline-block; +} + +.icondoc { + width: 24px; + height: 18px; + margin-bottom: 4px; + background-image:url('doc.png'); + background-position: 0px -4px; + background-repeat: repeat-y; + vertical-align:top; + display: inline-block; +} + +table.directory { + font: 400 14px Roboto,sans-serif; +} + +/* @end */ + +div.dynheader { + margin-top: 8px; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +address { + font-style: normal; + color: #2A3D61; +} + +table.doxtable caption { + caption-side: top; +} + +table.doxtable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.doxtable td, table.doxtable th { + border: 1px solid #2D4068; + padding: 3px 7px 2px; +} + +table.doxtable th { + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +table.fieldtable { + /*width: 100%;*/ + margin-bottom: 10px; + border: 1px solid #A8B8D9; + border-spacing: 0px; + -moz-border-radius: 4px; + -webkit-border-radius: 4px; + border-radius: 4px; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + -webkit-box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15); + box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15); +} + +.fieldtable td, .fieldtable th { + padding: 3px 7px 2px; +} + +.fieldtable td.fieldtype, .fieldtable td.fieldname { + white-space: nowrap; + border-right: 1px solid #A8B8D9; + border-bottom: 1px solid #A8B8D9; + vertical-align: top; +} + +.fieldtable td.fieldname { + padding-top: 3px; +} + +.fieldtable td.fielddoc { + border-bottom: 1px solid #A8B8D9; + /*width: 100%;*/ +} + +.fieldtable td.fielddoc p:first-child { + margin-top: 0px; +} + +.fieldtable td.fielddoc p:last-child { + margin-bottom: 2px; +} + +.fieldtable tr:last-child td { + border-bottom: none; +} + +.fieldtable th { + background-image:url('nav_f.png'); + background-repeat:repeat-x; + background-color: #E2E8F2; + font-size: 90%; + color: #253555; + padding-bottom: 4px; + padding-top: 5px; + text-align:left; + font-weight: 400; + -moz-border-radius-topleft: 4px; + -moz-border-radius-topright: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + border-top-left-radius: 4px; + border-top-right-radius: 4px; + border-bottom: 1px solid #A8B8D9; +} + + +.tabsearch { + top: 0px; + left: 10px; + height: 36px; + background-image: url('tab_b.png'); + z-index: 101; + overflow: hidden; + font-size: 13px; +} + +.navpath ul +{ + font-size: 11px; + background-image:url('tab_b.png'); + background-repeat:repeat-x; + background-position: 0 -5px; + height:30px; + line-height:30px; + color:#8AA0CC; + border:solid 1px #C2CDE4; + overflow:hidden; + margin:0px; + padding:0px; +} + +.navpath li +{ + list-style-type:none; + float:left; + padding-left:10px; + padding-right:15px; + background-image:url('bc_s.png'); + background-repeat:no-repeat; + background-position:right; + color:#364D7C; +} + +.navpath li.navelem a +{ + height:32px; + display:block; + text-decoration: none; + outline: none; + color: #283A5D; + font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + text-decoration: none; +} + +.navpath li.navelem a:hover +{ + color:#6884BD; +} + +.navpath li.footer +{ + list-style-type:none; + float:right; + padding-left:10px; + padding-right:15px; + background-image:none; + background-repeat:no-repeat; + background-position:right; + color:#364D7C; + font-size: 8pt; +} + + +div.summary +{ + float: right; + font-size: 8pt; + padding-right: 5px; + width: 50%; + text-align: right; +} + +div.summary a +{ + white-space: nowrap; +} + +table.classindex +{ + margin: 10px; + white-space: nowrap; + margin-left: 3%; + margin-right: 3%; + width: 94%; + border: 0; + border-spacing: 0; + padding: 0; +} + +div.ingroups +{ + font-size: 8pt; + width: 50%; + text-align: left; +} + +div.ingroups a +{ + white-space: nowrap; +} + +div.header +{ + background-image:url('nav_h.png'); + background-repeat:repeat-x; + background-color: #F9FAFC; + margin: 0px; + border-bottom: 1px solid #C4CFE5; +} + +div.headertitle +{ + padding: 5px 5px 5px 10px; +} + +.PageDocRTL-title div.headertitle { + text-align: right; + direction: rtl; +} + +dl { + padding: 0 0 0 0; +} + +/* dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug, dl.examples */ +dl.section { + margin-left: 0px; + padding-left: 0px; +} + +dl.section.DocNodeRTL { + margin-right: 0px; + padding-right: 0px; +} + +dl.note { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #D0C000; +} + +dl.note.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #D0C000; +} + +dl.warning, dl.attention { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #FF0000; +} + +dl.warning.DocNodeRTL, dl.attention.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #FF0000; +} + +dl.pre, dl.post, dl.invariant { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #00D000; +} + +dl.pre.DocNodeRTL, dl.post.DocNodeRTL, dl.invariant.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #00D000; +} + +dl.deprecated { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #505050; +} + +dl.deprecated.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #505050; +} + +dl.todo { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #00C0E0; +} + +dl.todo.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #00C0E0; +} + +dl.test { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #3030E0; +} + +dl.test.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #3030E0; +} + +dl.bug { + margin-left: -7px; + padding-left: 3px; + border-left: 4px solid; + border-color: #C08050; +} + +dl.bug.DocNodeRTL { + margin-left: 0; + padding-left: 0; + border-left: 0; + margin-right: -7px; + padding-right: 3px; + border-right: 4px solid; + border-color: #C08050; +} + +dl.section dd { + margin-bottom: 6px; +} + + +#projectlogo +{ + text-align: center; + vertical-align: bottom; + border-collapse: separate; +} + +#projectlogo img +{ + border: 0px none; +} + +#projectalign +{ + vertical-align: middle; +} + +#projectname +{ + font: 300% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 2px 0px; +} + +#projectbrief +{ + font: 120% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; +} + +#projectnumber +{ + font: 50% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; +} + +#titlearea +{ + padding: 0px; + margin: 0px; + width: 100%; + border-bottom: 1px solid #5373B4; +} + +.image +{ + text-align: center; +} + +.dotgraph +{ + text-align: center; +} + +.mscgraph +{ + text-align: center; +} + +.plantumlgraph +{ + text-align: center; +} + +.diagraph +{ + text-align: center; +} + +.caption +{ + font-weight: bold; +} + +div.zoom +{ + border: 1px solid #90A5CE; +} + +dl.citelist { + margin-bottom:50px; +} + +dl.citelist dt { + color:#334975; + float:left; + font-weight:bold; + margin-right:10px; + padding:5px; +} + +dl.citelist dd { + margin:2px 0; + padding:5px 0; +} + +div.toc { + padding: 14px 25px; + background-color: #F4F6FA; + border: 1px solid #D8DFEE; + border-radius: 7px 7px 7px 7px; + float: right; + height: auto; + margin: 0 8px 10px 10px; + width: 200px; +} + +.PageDocRTL-title div.toc { + float: left !important; + text-align: right; +} + +div.toc li { + background: url("bdwn.png") no-repeat scroll 0 5px transparent; + font: 10px/1.2 Verdana,DejaVu Sans,Geneva,sans-serif; + margin-top: 5px; + padding-left: 10px; + padding-top: 2px; +} + +.PageDocRTL-title div.toc li { + background-position-x: right !important; + padding-left: 0 !important; + padding-right: 10px; +} + +div.toc h3 { + font: bold 12px/1.2 Arial,FreeSans,sans-serif; + color: #4665A2; + border-bottom: 0 none; + margin: 0; +} + +div.toc ul { + list-style: none outside none; + border: medium none; + padding: 0px; +} + +div.toc li.level1 { + margin-left: 0px; +} + +div.toc li.level2 { + margin-left: 15px; +} + +div.toc li.level3 { + margin-left: 30px; +} + +div.toc li.level4 { + margin-left: 45px; +} + +.PageDocRTL-title div.toc li.level1 { + margin-left: 0 !important; + margin-right: 0; +} + +.PageDocRTL-title div.toc li.level2 { + margin-left: 0 !important; + margin-right: 15px; +} + +.PageDocRTL-title div.toc li.level3 { + margin-left: 0 !important; + margin-right: 30px; +} + +.PageDocRTL-title div.toc li.level4 { + margin-left: 0 !important; + margin-right: 45px; +} + +.inherit_header { + font-weight: bold; + color: gray; + cursor: pointer; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +.inherit_header td { + padding: 6px 0px 2px 5px; +} + +.inherit { + display: none; +} + +tr.heading h2 { + margin-top: 12px; + margin-bottom: 4px; +} + +/* tooltip related style info */ + +.ttc { + position: absolute; + display: none; +} + +#powerTip { + cursor: default; + white-space: nowrap; + background-color: white; + border: 1px solid gray; + border-radius: 4px 4px 4px 4px; + box-shadow: 1px 1px 7px gray; + display: none; + font-size: smaller; + max-width: 80%; + opacity: 0.9; + padding: 1ex 1em 1em; + position: absolute; + z-index: 2147483647; +} + +#powerTip div.ttdoc { + color: grey; + font-style: italic; +} + +#powerTip div.ttname a { + font-weight: bold; +} + +#powerTip div.ttname { + font-weight: bold; +} + +#powerTip div.ttdeci { + color: #006318; +} + +#powerTip div { + margin: 0px; + padding: 0px; + font: 12px/16px Roboto,sans-serif; +} + +#powerTip:before, #powerTip:after { + content: ""; + position: absolute; + margin: 0px; +} + +#powerTip.n:after, #powerTip.n:before, +#powerTip.s:after, #powerTip.s:before, +#powerTip.w:after, #powerTip.w:before, +#powerTip.e:after, #powerTip.e:before, +#powerTip.ne:after, #powerTip.ne:before, +#powerTip.se:after, #powerTip.se:before, +#powerTip.nw:after, #powerTip.nw:before, +#powerTip.sw:after, #powerTip.sw:before { + border: solid transparent; + content: " "; + height: 0; + width: 0; + position: absolute; +} + +#powerTip.n:after, #powerTip.s:after, +#powerTip.w:after, #powerTip.e:after, +#powerTip.nw:after, #powerTip.ne:after, +#powerTip.sw:after, #powerTip.se:after { + border-color: rgba(255, 255, 255, 0); +} + +#powerTip.n:before, #powerTip.s:before, +#powerTip.w:before, #powerTip.e:before, +#powerTip.nw:before, #powerTip.ne:before, +#powerTip.sw:before, #powerTip.se:before { + border-color: rgba(128, 128, 128, 0); +} + +#powerTip.n:after, #powerTip.n:before, +#powerTip.ne:after, #powerTip.ne:before, +#powerTip.nw:after, #powerTip.nw:before { + top: 100%; +} + +#powerTip.n:after, #powerTip.ne:after, #powerTip.nw:after { + border-top-color: #FFFFFF; + border-width: 10px; + margin: 0px -10px; +} +#powerTip.n:before { + border-top-color: #808080; + border-width: 11px; + margin: 0px -11px; +} +#powerTip.n:after, #powerTip.n:before { + left: 50%; +} + +#powerTip.nw:after, #powerTip.nw:before { + right: 14px; +} + +#powerTip.ne:after, #powerTip.ne:before { + left: 14px; +} + +#powerTip.s:after, #powerTip.s:before, +#powerTip.se:after, #powerTip.se:before, +#powerTip.sw:after, #powerTip.sw:before { + bottom: 100%; +} + +#powerTip.s:after, #powerTip.se:after, #powerTip.sw:after { + border-bottom-color: #FFFFFF; + border-width: 10px; + margin: 0px -10px; +} + +#powerTip.s:before, #powerTip.se:before, #powerTip.sw:before { + border-bottom-color: #808080; + border-width: 11px; + margin: 0px -11px; +} + +#powerTip.s:after, #powerTip.s:before { + left: 50%; +} + +#powerTip.sw:after, #powerTip.sw:before { + right: 14px; +} + +#powerTip.se:after, #powerTip.se:before { + left: 14px; +} + +#powerTip.e:after, #powerTip.e:before { + left: 100%; +} +#powerTip.e:after { + border-left-color: #FFFFFF; + border-width: 10px; + top: 50%; + margin-top: -10px; +} +#powerTip.e:before { + border-left-color: #808080; + border-width: 11px; + top: 50%; + margin-top: -11px; +} + +#powerTip.w:after, #powerTip.w:before { + right: 100%; +} +#powerTip.w:after { + border-right-color: #FFFFFF; + border-width: 10px; + top: 50%; + margin-top: -10px; +} +#powerTip.w:before { + border-right-color: #808080; + border-width: 11px; + top: 50%; + margin-top: -11px; +} + +@media print +{ + #top { display: none; } + #side-nav { display: none; } + #nav-path { display: none; } + body { overflow:visible; } + h1, h2, h3, h4, h5, h6 { page-break-after: avoid; } + .summary { display: none; } + .memitem { page-break-inside: avoid; } + #doc-content + { + margin-left:0 !important; + height:auto !important; + width:auto !important; + overflow:inherit; + display:inline; + } +} + +/* @group Markdown */ + +/* +table.markdownTable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.markdownTable td, table.markdownTable th { + border: 1px solid #2D4068; + padding: 3px 7px 2px; +} + +table.markdownTableHead tr { +} + +table.markdownTableBodyLeft td, table.markdownTable th { + border: 1px solid #2D4068; + padding: 3px 7px 2px; +} + +th.markdownTableHeadLeft th.markdownTableHeadRight th.markdownTableHeadCenter th.markdownTableHeadNone { + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +th.markdownTableHeadLeft { + text-align: left +} + +th.markdownTableHeadRight { + text-align: right +} + +th.markdownTableHeadCenter { + text-align: center +} +*/ + +table.markdownTable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.markdownTable td, table.markdownTable th { + border: 1px solid #2D4068; + padding: 3px 7px 2px; +} + +table.markdownTable tr { +} + +th.markdownTableHeadLeft, th.markdownTableHeadRight, th.markdownTableHeadCenter, th.markdownTableHeadNone { + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +th.markdownTableHeadLeft, td.markdownTableBodyLeft { + text-align: left +} + +th.markdownTableHeadRight, td.markdownTableBodyRight { + text-align: right +} + +th.markdownTableHeadCenter, td.markdownTableBodyCenter { + text-align: center +} + +.DocNodeRTL { + text-align: right; + direction: rtl; +} + +.DocNodeLTR { + text-align: left; + direction: ltr; +} + +table.DocNodeRTL { + width: auto; + margin-right: 0; + margin-left: auto; +} + +table.DocNodeLTR { + width: auto; + margin-right: auto; + margin-left: 0; +} + +tt, code, kbd, samp +{ + display: inline-block; + direction:ltr; +} +/* @end */ + +u { + text-decoration: underline; +} + diff --git a/docs/docs/doxygen/html/doxygen.png b/docs/docs/doxygen/html/doxygen.png new file mode 100644 index 00000000..3ff17d80 Binary files /dev/null and b/docs/docs/doxygen/html/doxygen.png differ diff --git a/docs/docs/doxygen/html/dynsections.js b/docs/docs/doxygen/html/dynsections.js new file mode 100644 index 00000000..ea0a7b39 --- /dev/null +++ b/docs/docs/doxygen/html/dynsections.js @@ -0,0 +1,120 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function toggleVisibility(linkObj) +{ + var base = $(linkObj).attr('id'); + var summary = $('#'+base+'-summary'); + var content = $('#'+base+'-content'); + var trigger = $('#'+base+'-trigger'); + var src=$(trigger).attr('src'); + if (content.is(':visible')===true) { + content.hide(); + summary.show(); + $(linkObj).addClass('closed').removeClass('opened'); + $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png'); + } else { + content.show(); + summary.hide(); + $(linkObj).removeClass('closed').addClass('opened'); + $(trigger).attr('src',src.substring(0,src.length-10)+'open.png'); + } + return false; +} + +function updateStripes() +{ + $('table.directory tr'). + removeClass('even').filter(':visible:even').addClass('even'); +} + +function toggleLevel(level) +{ + $('table.directory tr').each(function() { + var l = this.id.split('_').length-1; + var i = $('#img'+this.id.substring(3)); + var a = $('#arr'+this.id.substring(3)); + if (l + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-add-mod.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
eltwise-add-mod.hpp File Reference
+
+
+
#include <stdint.h>
+
+

Go to the source code of this file.

+ + + + + + +

+Namespaces

 intel
 
 intel::hexl
 
+ + + + +

+Functions

void intel::hexl::EltwiseAddMod (uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus)
 Adds two vectors elementwise with modular reduction. More...
 
+
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-add-mod_8hpp_source.html b/docs/docs/doxygen/html/eltwise-add-mod_8hpp_source.html new file mode 100644 index 00000000..cdf2f70a --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-add-mod_8hpp_source.html @@ -0,0 +1,105 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-add-mod.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise-add-mod.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+
6 #include <stdint.h>
+
7 
+
8 namespace intel {
+
9 namespace hexl {
+
10 
+
22 void EltwiseAddMod(uint64_t* result, const uint64_t* operand1,
+
23  const uint64_t* operand2, uint64_t n, uint64_t modulus);
+
24 
+
25 } // namespace hexl
+
26 } // namespace intel
+
+
Definition: eltwise-add-mod.hpp:8
+
void EltwiseAddMod(uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus)
Adds two vectors elementwise with modular reduction.
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-cmp-add_8hpp.html b/docs/docs/doxygen/html/eltwise-cmp-add_8hpp.html new file mode 100644 index 00000000..e50535f7 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-cmp-add_8hpp.html @@ -0,0 +1,109 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-add.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
eltwise-cmp-add.hpp File Reference
+
+
+
#include <stdint.h>
+#include "intel-hexl/util/util.hpp"
+
+

Go to the source code of this file.

+ + + + + + +

+Namespaces

 intel
 
 intel::hexl
 
+ + + + +

+Functions

void intel::hexl::EltwiseCmpAdd (uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t n)
 Computes element-wise conditional addition. More...
 
+
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-cmp-add_8hpp_source.html b/docs/docs/doxygen/html/eltwise-cmp-add_8hpp_source.html new file mode 100644 index 00000000..7ac81880 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-cmp-add_8hpp_source.html @@ -0,0 +1,109 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-add.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise-cmp-add.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+
6 #include <stdint.h>
+
7 
+ +
9 
+
10 namespace intel {
+
11 namespace hexl {
+
12 
+
22 void EltwiseCmpAdd(uint64_t* result, const uint64_t* operand1, CMPINT cmp,
+
23  uint64_t bound, uint64_t diff, uint64_t n);
+
24 
+
25 } // namespace hexl
+
26 } // namespace intel
+
+
void EltwiseCmpAdd(uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t n)
Computes element-wise conditional addition.
+
CMPINT
Represents binary operations between two boolean values.
Definition: util.hpp:14
+
Definition: eltwise-add-mod.hpp:8
+ + + + + diff --git a/docs/docs/doxygen/html/eltwise-cmp-sub-mod_8hpp.html b/docs/docs/doxygen/html/eltwise-cmp-sub-mod_8hpp.html new file mode 100644 index 00000000..d12c561f --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-cmp-sub-mod_8hpp.html @@ -0,0 +1,109 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
eltwise-cmp-sub-mod.hpp File Reference
+
+
+
#include <stdint.h>
+#include "intel-hexl/util/util.hpp"
+
+

Go to the source code of this file.

+ + + + + + +

+Namespaces

 intel
 
 intel::hexl
 
+ + + + +

+Functions

void intel::hexl::EltwiseCmpSubMod (uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t modulus, uint64_t n)
 Computes element-wise conditional modular subtraction. More...
 
+
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-cmp-sub-mod_8hpp_source.html b/docs/docs/doxygen/html/eltwise-cmp-sub-mod_8hpp_source.html new file mode 100644 index 00000000..0f5ecfc9 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-cmp-sub-mod_8hpp_source.html @@ -0,0 +1,110 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise-cmp-sub-mod.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+
6 #include <stdint.h>
+
7 
+ +
9 
+
10 namespace intel {
+
11 namespace hexl {
+
12 
+
23 void EltwiseCmpSubMod(uint64_t* result, const uint64_t* operand1, CMPINT cmp,
+
24  uint64_t bound, uint64_t diff, uint64_t modulus,
+
25  uint64_t n);
+
26 
+
27 } // namespace hexl
+
28 } // namespace intel
+
+
CMPINT
Represents binary operations between two boolean values.
Definition: util.hpp:14
+
Definition: eltwise-add-mod.hpp:8
+
void EltwiseCmpSubMod(uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t modulus, uint64_t n)
Computes element-wise conditional modular subtraction.
+ + + + + diff --git a/docs/docs/doxygen/html/eltwise-fma-mod_8hpp.html b/docs/docs/doxygen/html/eltwise-fma-mod_8hpp.html new file mode 100644 index 00000000..010b9d88 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-fma-mod_8hpp.html @@ -0,0 +1,108 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-fma-mod.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
eltwise-fma-mod.hpp File Reference
+
+
+
#include <stdint.h>
+
+

Go to the source code of this file.

+ + + + + + +

+Namespaces

 intel
 
 intel::hexl
 
+ + + + +

+Functions

void intel::hexl::EltwiseFMAMod (uint64_t *result, const uint64_t *arg1, uint64_t arg2, const uint64_t *arg3, uint64_t n, uint64_t modulus, uint64_t input_mod_factor)
 Computes fused multiply-add (arg1 * arg2 + arg3) mod modulus element-wise, broadcasting scalars to vectors. More...
 
+
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-fma-mod_8hpp_source.html b/docs/docs/doxygen/html/eltwise-fma-mod_8hpp_source.html new file mode 100644 index 00000000..914fcf64 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-fma-mod_8hpp_source.html @@ -0,0 +1,106 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-fma-mod.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise-fma-mod.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+
6 #include <stdint.h>
+
7 
+
8 namespace intel {
+
9 namespace hexl {
+
10 
+
22 void EltwiseFMAMod(uint64_t* result, const uint64_t* arg1, uint64_t arg2,
+
23  const uint64_t* arg3, uint64_t n, uint64_t modulus,
+
24  uint64_t input_mod_factor);
+
25 
+
26 } // namespace hexl
+
27 } // namespace intel
+
+
void EltwiseFMAMod(uint64_t *result, const uint64_t *arg1, uint64_t arg2, const uint64_t *arg3, uint64_t n, uint64_t modulus, uint64_t input_mod_factor)
Computes fused multiply-add (arg1 * arg2 + arg3) mod modulus element-wise, broadcasting scalars to ve...
+
Definition: eltwise-add-mod.hpp:8
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-mult-mod_8hpp.html b/docs/docs/doxygen/html/eltwise-mult-mod_8hpp.html new file mode 100644 index 00000000..f0bbdf33 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-mult-mod_8hpp.html @@ -0,0 +1,108 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-mult-mod.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
eltwise-mult-mod.hpp File Reference
+
+
+
#include <stdint.h>
+
+

Go to the source code of this file.

+ + + + + + +

+Namespaces

 intel
 
 intel::hexl
 
+ + + + +

+Functions

void intel::hexl::EltwiseMultMod (uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus, uint64_t input_mod_factor)
 Multiplies two vectors elementwise with modular reduction. More...
 
+
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-mult-mod_8hpp_source.html b/docs/docs/doxygen/html/eltwise-mult-mod_8hpp_source.html new file mode 100644 index 00000000..0cc07b7b --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-mult-mod_8hpp_source.html @@ -0,0 +1,106 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-mult-mod.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise-mult-mod.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+
6 #include <stdint.h>
+
7 
+
8 namespace intel {
+
9 namespace hexl {
+
10 
+
23 void EltwiseMultMod(uint64_t* result, const uint64_t* operand1,
+
24  const uint64_t* operand2, uint64_t n, uint64_t modulus,
+
25  uint64_t input_mod_factor);
+
26 
+
27 } // namespace hexl
+
28 } // namespace intel
+
+
Definition: eltwise-add-mod.hpp:8
+
void EltwiseMultMod(uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus, uint64_t input_mod_factor)
Multiplies two vectors elementwise with modular reduction.
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-reduce-mod_8hpp.html b/docs/docs/doxygen/html/eltwise-reduce-mod_8hpp.html new file mode 100644 index 00000000..415cd6e7 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-reduce-mod_8hpp.html @@ -0,0 +1,108 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-reduce-mod.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
eltwise-reduce-mod.hpp File Reference
+
+
+
#include <stdint.h>
+
+

Go to the source code of this file.

+ + + + + + +

+Namespaces

 intel
 
 intel::hexl
 
+ + + + +

+Functions

void intel::hexl::EltwiseReduceMod (uint64_t *result, const uint64_t *operand, uint64_t modulus, uint64_t n, uint64_t input_mod_factor, uint64_t output_mod_factor)
 Performs elementwise modular reduction. More...
 
+
+ + + + diff --git a/docs/docs/doxygen/html/eltwise-reduce-mod_8hpp_source.html b/docs/docs/doxygen/html/eltwise-reduce-mod_8hpp_source.html new file mode 100644 index 00000000..e202c870 --- /dev/null +++ b/docs/docs/doxygen/html/eltwise-reduce-mod_8hpp_source.html @@ -0,0 +1,106 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-reduce-mod.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
eltwise-reduce-mod.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+
6 #include <stdint.h>
+
7 
+
8 namespace intel {
+
9 namespace hexl {
+
10 
+
24 void EltwiseReduceMod(uint64_t* result, const uint64_t* operand,
+
25  uint64_t modulus, uint64_t n, uint64_t input_mod_factor,
+
26  uint64_t output_mod_factor);
+
27 
+
28 } // namespace hexl
+
29 } // namespace intel
+
+
Definition: eltwise-add-mod.hpp:8
+
void EltwiseReduceMod(uint64_t *result, const uint64_t *operand, uint64_t modulus, uint64_t n, uint64_t input_mod_factor, uint64_t output_mod_factor)
Performs elementwise modular reduction.
+ + + + diff --git a/docs/docs/doxygen/html/files.html b/docs/docs/doxygen/html/files.html new file mode 100644 index 00000000..6b8b6adf --- /dev/null +++ b/docs/docs/doxygen/html/files.html @@ -0,0 +1,103 @@ + + + + + + + +Intel HEXL: File List + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
File List
+
+
+
Here is a list of all files with brief descriptions:
+
+ + + + diff --git a/docs/docs/doxygen/html/folderclosed.png b/docs/docs/doxygen/html/folderclosed.png new file mode 100644 index 00000000..bb8ab35e Binary files /dev/null and b/docs/docs/doxygen/html/folderclosed.png differ diff --git a/docs/docs/doxygen/html/folderopen.png b/docs/docs/doxygen/html/folderopen.png new file mode 100644 index 00000000..d6c7f676 Binary files /dev/null and b/docs/docs/doxygen/html/folderopen.png differ diff --git a/docs/docs/doxygen/html/functions.html b/docs/docs/doxygen/html/functions.html new file mode 100644 index 00000000..5e218030 --- /dev/null +++ b/docs/docs/doxygen/html/functions.html @@ -0,0 +1,94 @@ + + + + + + + +Intel HEXL: Class Members + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+
+ + + + diff --git a/docs/docs/doxygen/html/functions_func.html b/docs/docs/doxygen/html/functions_func.html new file mode 100644 index 00000000..224be56b --- /dev/null +++ b/docs/docs/doxygen/html/functions_func.html @@ -0,0 +1,94 @@ + + + + + + + +Intel HEXL: Class Members - Functions + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+ + + + diff --git a/docs/docs/doxygen/html/index.html b/docs/docs/doxygen/html/index.html new file mode 100644 index 00000000..09f8d604 --- /dev/null +++ b/docs/docs/doxygen/html/index.html @@ -0,0 +1,223 @@ + + + + + + + +Intel HEXL: Intel Homomorphic Encryption Acceleration Library (HEXL) + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Intel Homomorphic Encryption Acceleration Library (HEXL)
+
+
+

Intel HEXL is an open-source library which provides efficient implementations of integer arithmetic on Galois fields. Such arithmetic is prevalent in cryptography, particularly in homomorphic encryption (HE) schemes. Intel HEXL targets integer arithmetic with word-sized primes, typically 40-60 bits. Intel HEXL provides an API for 64-bit unsigned integers and targets Intel CPUs.

+

+Contents

+ +

+Introduction

+

Many cryptographic applications, particularly homomorphic encryption (HE), rely on integer polynomial arithmetic in a finite field. HE, which enables computation on encrypted data, typically uses polynomials with degree N a power of two roughly in the range N=[2^{10}, 2^{17}]. The coefficients of these polynomials are in a finite field with a word-sized primes, p, up to p~62 bits. More precisely, the polynomials live in the ring Z_p[X]/(X^N + 1). That is, when adding or multiplying two polynomials, each coefficient of the result is reduced by the prime modulus p. When multiplying two polynomials, the resulting polynomials of degree 2N is additionally reduced by taking the remainder when dividing by X^N+1.

+

The primary bottleneck in many HE applications is polynomial-polynomial multiplication in Z_p[X]/(X^N + 1). For efficient implementation, Intel HEXL implements the negacyclic number-theoretic transform (NTT). To multiply two polynomials, p_1(x), p_2(x) using the NTT, we perform the FwdNTT on the two input polynomials, then perform an element-wise modular multiplication, and perform the InvNTT on the result.

+

Intel HEXL implements the following functions:

    +
  • The forward and inverse negacyclic number-theoretic transform (NTT)
  • +
  • Element-wise vector-vector modular multiplication
  • +
  • Element-wise vector-scalar modular multiplication with optional addition
  • +
  • Element-wise modular multiplication
  • +
+

For each function, the library implements one or several Intel(R) AVX-512 implementations, as well as a less performant, more readable native C++ implementation. Intel HEXL will automatically choose the best implementation for the given CPU Intel(R) AVX-512 feature set. In particular, when the modulus p is less than 2^{50}, the AVX512IFMA instruction set available on Intel IceLake server and IceLake client will provide a more efficient implementation.

+

For additional functionality, see the public headers, located in include/intel-hexl

+

+Building Intel HEXL

+

+Dependencies

+

We have tested Intel HEXL on the following operating systems:

    +
  • Ubuntu 18.04
  • +
  • macOS 10.15
  • +
  • Microsoft Windows 10
  • +
+

Intel HEXL requires the following dependencies:

+ + + + + + + +
Dependency Version
CMake >= 3.5.1
Compiler gcc >= 7.0, clang++ >= 5.0, MSVC >= 2019
+

For best performance, we recommend using a processor with AVX512-IFMA52 support, and a recent compiler (gcc >= 8.0, clang++ >= 6.0). To determine if your process supports AVX512-IFMA52, simply look for HEXL_HAS_AVX512IFMA during the configure step (see Compiling Intel HEXL).

+

+Compile-time options

+

In addition to the standard CMake build options, Intel HEXL supports several compile-time flags to configure the build. For convenience, they are listed below:

+ + + + + + + + + + + + + + + + + + + + + + + +
CMake option Values
HEXL_BENCHMARK ON / OFF (default ON) Set to ON to enable benchmark suite via Google benchmark
HEXL_COVERAGE ON / OFF (default OFF) Set to ON to enable coverage report of unit-tests
HEXL_DEBUG ON / OFF (default OFF) Set to ON to enable debugging at large runtime penalty
HEXL_DOCS ON / OFF (default OFF) Set to ON to enable building of documentation
HEXL_ENABLE_ADDRESS_SANITIZER ON / OFF (default OFF) Set to ON to enable building with address sanitizer (ASan)
HEXL_ENABLE_THREAD_SANITIZER ON / OFF (default OFF) Set to ON to enable building with thread sanitizer (TSan)
HEXL_ENABLE_UB_SANITIZER ON / OFF (default OFF) Set to ON to enable building with undefined behavior sanitizer (UBSan)
HEXL_EXPORT ON / OFF (default OFF) Set to ON to enable export of Intel HEXL for use in 3rd-party project
HEXL_SHARED_LIB ON / OFF (default OFF) Set to ON to enable building shared library
HEXL_TESTING ON / OFF (default ON) Set to ON to enable building of unit-tests
+

+Compiling Intel HEXL

+

The instructions to build Intel HEXL are common between Linux, MacOS, and Windows.

+

To compile Intel HEXL from source code, first clone the repository into your current directory. Then, to configure the build, call

cmake -S . -B build
+

adding the desired compile-time options with a -D flag. For instance, to build Intel HEXL with debugging capabilities, call

cmake -S . -B build -DHEXL_DEBUG=ON
+

Then, to build Intel HEXL, call

cmake --build build
+

This will build the Intel HEXL library in the build/intel-hexl/lib/ directory.

+

To install Intel HEXL to the installation directory, run

cmake --install build
+

To use a non-standard installation directory, configure the build with

cmake -S . -B build -DCMAKE_INSTALL_PREFIX=/path/to/install
+

+Testing Intel HEXL

+

To run a set of unit tests via Googletest, configure and build Intel HEXL with -DHEXL_TESTING=ON (see Compile-time options). Then, run

cmake --build build --target unittest
+

The unit-test executable itself is located at build/test/unit-test

+

+Benchmarking Intel HEXL

+

To run a set of benchmarks via Google benchmark, configure and build Intel HEXL with -DHEXL_BENCHMARK=ON (see Compile-time options). Then, run

cmake --build build --target bench
+

The benchmark executable itself is located at build/benchmark/bench_hexl

+

+Using Intel HEXL

+

The example folder has an example of using Intel HEXL in a third-party project.

+

+Debugging

+

For optimal performance, Intel HEXL does not perform input validation. In many cases the time required for the validation would be longer than the execution of the function itself. To debug Intel HEXL, configure and build Intel HEXL with -DHEXL_DEBUG=ON (see Compile-time options). This will generate a debug version of the library, e.g. libintel_hexl_debug.a, that can be used to debug the execution.

+

Note, enabling HEXL_DEBUG=ON will result in a significant runtime overhead.

+

+Thread-safety

+

Intel HEXL is single-threaded and thread-safe.

+

+Documentation

+

Intel HEXL supports documentation via Doxygen and sphinx. To build documentation, first install doxygen and graphviz, e.g.

sudo apt-get install doxygen graphviz
+

Then, configure Intel HEXL with -DHEXL_DOCS=ON (see Compile-time options).

+

+Doxygen

+

To build Doxygen documentation, after configuring Intel HEXL with -DHEXL_DOCS=ON, run

cmake --build build --target doxygen
+

To view the generated Doxygen documentation, open the generated build/docs/doxygen/html/index.html file in a web browser.

+

+Sphinx

+

To build the sphinx documentation, install sphinx and required dependencies breathe, m2r2, e.g.

sudo apt-get install python3-sphinx
+
pip3 install breathe m2r2
+

Then, after configuring Intel HEXL with -DHEXL_DOCS=ON, run

cmake --build build --target docs
+

To view the generated Sphinx documentation, open the generated build/docs/sphinx/html/index.html file in a web browser.

+

+Contributing

+

At this time, Intel HEXL does not accept external contributions. Feel free to discuss via issues.

+

For Intel developers, use pre-commit to validate the formatting of the code.

+

Before contributing, please run

make check
+

and make sure all unit tests and pre-commit checks pass.

+

+Repository layout

+

Public headers reside in the intel-hexl/include folder. Private headers, e.g. those containing Intel(R) AVX-512 code should not be put in this folder.

+
+
+ + + + diff --git a/docs/docs/doxygen/html/intel-hexl_8hpp.html b/docs/docs/doxygen/html/intel-hexl_8hpp.html new file mode 100644 index 00000000..b7d14d21 --- /dev/null +++ b/docs/docs/doxygen/html/intel-hexl_8hpp.html @@ -0,0 +1,98 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp File Reference + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
intel-hexl.hpp File Reference
+
+ + + + + diff --git a/docs/docs/doxygen/html/intel-hexl_8hpp_source.html b/docs/docs/doxygen/html/intel-hexl_8hpp_source.html new file mode 100644 index 00000000..8bad6719 --- /dev/null +++ b/docs/docs/doxygen/html/intel-hexl_8hpp_source.html @@ -0,0 +1,109 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp Source File + + + + + + + + + + + +
+
+ + + + + + +
+
Intel HEXL +
+
Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
intel-hexl.hpp
+
+
+Go to the documentation of this file.
1 // Copyright (C) 2020-2021 Intel Corporation
+
2 // SPDX-License-Identifier: Apache-2.0
+
3 
+
4 #pragma once
+
5 
+ + + + + + +
12 #include "intel-hexl/ntt/ntt.hpp"
+
13 #include "intel-hexl/util/util.hpp"
+
+ + + + + + + + + + + + diff --git a/docs/docs/doxygen/html/jquery.js b/docs/docs/doxygen/html/jquery.js new file mode 100644 index 00000000..103c32d7 --- /dev/null +++ b/docs/docs/doxygen/html/jquery.js @@ -0,0 +1,35 @@ +/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */ +!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,""],thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;nx",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="
",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0a;a++)for(i in o[a])n=o[a][i],o[a].hasOwnProperty(i)&&void 0!==n&&(e[i]=t.isPlainObject(n)?t.isPlainObject(e[i])?t.widget.extend({},e[i],n):t.widget.extend({},n):n);return e},t.widget.bridge=function(e,i){var n=i.prototype.widgetFullName||e;t.fn[e]=function(o){var a="string"==typeof o,r=s.call(arguments,1),h=this;return a?this.length||"instance"!==o?this.each(function(){var i,s=t.data(this,n);return"instance"===o?(h=s,!1):s?t.isFunction(s[o])&&"_"!==o.charAt(0)?(i=s[o].apply(s,r),i!==s&&void 0!==i?(h=i&&i.jquery?h.pushStack(i.get()):i,!1):void 0):t.error("no such method '"+o+"' for "+e+" widget instance"):t.error("cannot call methods on "+e+" prior to initialization; "+"attempted to call method '"+o+"'")}):h=void 0:(r.length&&(o=t.widget.extend.apply(null,[o].concat(r))),this.each(function(){var e=t.data(this,n);e?(e.option(o||{}),e._init&&e._init()):t.data(this,n,new i(o,this))})),h}},t.Widget=function(){},t.Widget._childConstructors=[],t.Widget.prototype={widgetName:"widget",widgetEventPrefix:"",defaultElement:"
",options:{classes:{},disabled:!1,create:null},_createWidget:function(e,s){s=t(s||this.defaultElement||this)[0],this.element=t(s),this.uuid=i++,this.eventNamespace="."+this.widgetName+this.uuid,this.bindings=t(),this.hoverable=t(),this.focusable=t(),this.classesElementLookup={},s!==this&&(t.data(s,this.widgetFullName,this),this._on(!0,this.element,{remove:function(t){t.target===s&&this.destroy()}}),this.document=t(s.style?s.ownerDocument:s.document||s),this.window=t(this.document[0].defaultView||this.document[0].parentWindow)),this.options=t.widget.extend({},this.options,this._getCreateOptions(),e),this._create(),this.options.disabled&&this._setOptionDisabled(this.options.disabled),this._trigger("create",null,this._getCreateEventData()),this._init()},_getCreateOptions:function(){return{}},_getCreateEventData:t.noop,_create:t.noop,_init:t.noop,destroy:function(){var e=this;this._destroy(),t.each(this.classesElementLookup,function(t,i){e._removeClass(i,t)}),this.element.off(this.eventNamespace).removeData(this.widgetFullName),this.widget().off(this.eventNamespace).removeAttr("aria-disabled"),this.bindings.off(this.eventNamespace)},_destroy:t.noop,widget:function(){return this.element},option:function(e,i){var s,n,o,a=e;if(0===arguments.length)return t.widget.extend({},this.options);if("string"==typeof e)if(a={},s=e.split("."),e=s.shift(),s.length){for(n=a[e]=t.widget.extend({},this.options[e]),o=0;s.length-1>o;o++)n[s[o]]=n[s[o]]||{},n=n[s[o]];if(e=s.pop(),1===arguments.length)return void 0===n[e]?null:n[e];n[e]=i}else{if(1===arguments.length)return void 0===this.options[e]?null:this.options[e];a[e]=i}return this._setOptions(a),this},_setOptions:function(t){var e;for(e in t)this._setOption(e,t[e]);return this},_setOption:function(t,e){return"classes"===t&&this._setOptionClasses(e),this.options[t]=e,"disabled"===t&&this._setOptionDisabled(e),this},_setOptionClasses:function(e){var i,s,n;for(i in e)n=this.classesElementLookup[i],e[i]!==this.options.classes[i]&&n&&n.length&&(s=t(n.get()),this._removeClass(n,i),s.addClass(this._classes({element:s,keys:i,classes:e,add:!0})))},_setOptionDisabled:function(t){this._toggleClass(this.widget(),this.widgetFullName+"-disabled",null,!!t),t&&(this._removeClass(this.hoverable,null,"ui-state-hover"),this._removeClass(this.focusable,null,"ui-state-focus"))},enable:function(){return this._setOptions({disabled:!1})},disable:function(){return this._setOptions({disabled:!0})},_classes:function(e){function i(i,o){var a,r;for(r=0;i.length>r;r++)a=n.classesElementLookup[i[r]]||t(),a=e.add?t(t.unique(a.get().concat(e.element.get()))):t(a.not(e.element).get()),n.classesElementLookup[i[r]]=a,s.push(i[r]),o&&e.classes[i[r]]&&s.push(e.classes[i[r]])}var s=[],n=this;return e=t.extend({element:this.element,classes:this.options.classes||{}},e),this._on(e.element,{remove:"_untrackClassesElement"}),e.keys&&i(e.keys.match(/\S+/g)||[],!0),e.extra&&i(e.extra.match(/\S+/g)||[]),s.join(" ")},_untrackClassesElement:function(e){var i=this;t.each(i.classesElementLookup,function(s,n){-1!==t.inArray(e.target,n)&&(i.classesElementLookup[s]=t(n.not(e.target).get()))})},_removeClass:function(t,e,i){return this._toggleClass(t,e,i,!1)},_addClass:function(t,e,i){return this._toggleClass(t,e,i,!0)},_toggleClass:function(t,e,i,s){s="boolean"==typeof s?s:i;var n="string"==typeof t||null===t,o={extra:n?e:i,keys:n?t:e,element:n?this.element:t,add:s};return o.element.toggleClass(this._classes(o),s),this},_on:function(e,i,s){var n,o=this;"boolean"!=typeof e&&(s=i,i=e,e=!1),s?(i=n=t(i),this.bindings=this.bindings.add(i)):(s=i,i=this.element,n=this.widget()),t.each(s,function(s,a){function r(){return e||o.options.disabled!==!0&&!t(this).hasClass("ui-state-disabled")?("string"==typeof a?o[a]:a).apply(o,arguments):void 0}"string"!=typeof a&&(r.guid=a.guid=a.guid||r.guid||t.guid++);var h=s.match(/^([\w:-]*)\s*(.*)$/),l=h[1]+o.eventNamespace,c=h[2];c?n.on(l,c,r):i.on(l,r)})},_off:function(e,i){i=(i||"").split(" ").join(this.eventNamespace+" ")+this.eventNamespace,e.off(i).off(i),this.bindings=t(this.bindings.not(e).get()),this.focusable=t(this.focusable.not(e).get()),this.hoverable=t(this.hoverable.not(e).get())},_delay:function(t,e){function i(){return("string"==typeof t?s[t]:t).apply(s,arguments)}var s=this;return setTimeout(i,e||0)},_hoverable:function(e){this.hoverable=this.hoverable.add(e),this._on(e,{mouseenter:function(e){this._addClass(t(e.currentTarget),null,"ui-state-hover")},mouseleave:function(e){this._removeClass(t(e.currentTarget),null,"ui-state-hover")}})},_focusable:function(e){this.focusable=this.focusable.add(e),this._on(e,{focusin:function(e){this._addClass(t(e.currentTarget),null,"ui-state-focus")},focusout:function(e){this._removeClass(t(e.currentTarget),null,"ui-state-focus")}})},_trigger:function(e,i,s){var n,o,a=this.options[e];if(s=s||{},i=t.Event(i),i.type=(e===this.widgetEventPrefix?e:this.widgetEventPrefix+e).toLowerCase(),i.target=this.element[0],o=i.originalEvent)for(n in o)n in i||(i[n]=o[n]);return this.element.trigger(i,s),!(t.isFunction(a)&&a.apply(this.element[0],[i].concat(s))===!1||i.isDefaultPrevented())}},t.each({show:"fadeIn",hide:"fadeOut"},function(e,i){t.Widget.prototype["_"+e]=function(s,n,o){"string"==typeof n&&(n={effect:n});var a,r=n?n===!0||"number"==typeof n?i:n.effect||i:e;n=n||{},"number"==typeof n&&(n={duration:n}),a=!t.isEmptyObject(n),n.complete=o,n.delay&&s.delay(n.delay),a&&t.effects&&t.effects.effect[r]?s[e](n):r!==e&&s[r]?s[r](n.duration,n.easing,o):s.queue(function(i){t(this)[e](),o&&o.call(s[0]),i()})}}),t.widget,function(){function e(t,e,i){return[parseFloat(t[0])*(u.test(t[0])?e/100:1),parseFloat(t[1])*(u.test(t[1])?i/100:1)]}function i(e,i){return parseInt(t.css(e,i),10)||0}function s(e){var i=e[0];return 9===i.nodeType?{width:e.width(),height:e.height(),offset:{top:0,left:0}}:t.isWindow(i)?{width:e.width(),height:e.height(),offset:{top:e.scrollTop(),left:e.scrollLeft()}}:i.preventDefault?{width:0,height:0,offset:{top:i.pageY,left:i.pageX}}:{width:e.outerWidth(),height:e.outerHeight(),offset:e.offset()}}var n,o=Math.max,a=Math.abs,r=/left|center|right/,h=/top|center|bottom/,l=/[\+\-]\d+(\.[\d]+)?%?/,c=/^\w+/,u=/%$/,d=t.fn.position;t.position={scrollbarWidth:function(){if(void 0!==n)return n;var e,i,s=t("
"),o=s.children()[0];return t("body").append(s),e=o.offsetWidth,s.css("overflow","scroll"),i=o.offsetWidth,e===i&&(i=s[0].clientWidth),s.remove(),n=e-i},getScrollInfo:function(e){var i=e.isWindow||e.isDocument?"":e.element.css("overflow-x"),s=e.isWindow||e.isDocument?"":e.element.css("overflow-y"),n="scroll"===i||"auto"===i&&e.widthi?"left":e>0?"right":"center",vertical:0>r?"top":s>0?"bottom":"middle"};l>p&&p>a(e+i)&&(u.horizontal="center"),c>f&&f>a(s+r)&&(u.vertical="middle"),u.important=o(a(e),a(i))>o(a(s),a(r))?"horizontal":"vertical",n.using.call(this,t,u)}),h.offset(t.extend(D,{using:r}))})},t.ui.position={fit:{left:function(t,e){var i,s=e.within,n=s.isWindow?s.scrollLeft:s.offset.left,a=s.width,r=t.left-e.collisionPosition.marginLeft,h=n-r,l=r+e.collisionWidth-a-n;e.collisionWidth>a?h>0&&0>=l?(i=t.left+h+e.collisionWidth-a-n,t.left+=h-i):t.left=l>0&&0>=h?n:h>l?n+a-e.collisionWidth:n:h>0?t.left+=h:l>0?t.left-=l:t.left=o(t.left-r,t.left)},top:function(t,e){var i,s=e.within,n=s.isWindow?s.scrollTop:s.offset.top,a=e.within.height,r=t.top-e.collisionPosition.marginTop,h=n-r,l=r+e.collisionHeight-a-n;e.collisionHeight>a?h>0&&0>=l?(i=t.top+h+e.collisionHeight-a-n,t.top+=h-i):t.top=l>0&&0>=h?n:h>l?n+a-e.collisionHeight:n:h>0?t.top+=h:l>0?t.top-=l:t.top=o(t.top-r,t.top)}},flip:{left:function(t,e){var i,s,n=e.within,o=n.offset.left+n.scrollLeft,r=n.width,h=n.isWindow?n.scrollLeft:n.offset.left,l=t.left-e.collisionPosition.marginLeft,c=l-h,u=l+e.collisionWidth-r-h,d="left"===e.my[0]?-e.elemWidth:"right"===e.my[0]?e.elemWidth:0,p="left"===e.at[0]?e.targetWidth:"right"===e.at[0]?-e.targetWidth:0,f=-2*e.offset[0];0>c?(i=t.left+d+p+f+e.collisionWidth-r-o,(0>i||a(c)>i)&&(t.left+=d+p+f)):u>0&&(s=t.left-e.collisionPosition.marginLeft+d+p+f-h,(s>0||u>a(s))&&(t.left+=d+p+f))},top:function(t,e){var i,s,n=e.within,o=n.offset.top+n.scrollTop,r=n.height,h=n.isWindow?n.scrollTop:n.offset.top,l=t.top-e.collisionPosition.marginTop,c=l-h,u=l+e.collisionHeight-r-h,d="top"===e.my[1],p=d?-e.elemHeight:"bottom"===e.my[1]?e.elemHeight:0,f="top"===e.at[1]?e.targetHeight:"bottom"===e.at[1]?-e.targetHeight:0,m=-2*e.offset[1];0>c?(s=t.top+p+f+m+e.collisionHeight-r-o,(0>s||a(c)>s)&&(t.top+=p+f+m)):u>0&&(i=t.top-e.collisionPosition.marginTop+p+f+m-h,(i>0||u>a(i))&&(t.top+=p+f+m))}},flipfit:{left:function(){t.ui.position.flip.left.apply(this,arguments),t.ui.position.fit.left.apply(this,arguments)},top:function(){t.ui.position.flip.top.apply(this,arguments),t.ui.position.fit.top.apply(this,arguments)}}}}(),t.ui.position,t.extend(t.expr[":"],{data:t.expr.createPseudo?t.expr.createPseudo(function(e){return function(i){return!!t.data(i,e)}}):function(e,i,s){return!!t.data(e,s[3])}}),t.fn.extend({disableSelection:function(){var t="onselectstart"in document.createElement("div")?"selectstart":"mousedown";return function(){return this.on(t+".ui-disableSelection",function(t){t.preventDefault()})}}(),enableSelection:function(){return this.off(".ui-disableSelection")}}),t.ui.focusable=function(i,s){var n,o,a,r,h,l=i.nodeName.toLowerCase();return"area"===l?(n=i.parentNode,o=n.name,i.href&&o&&"map"===n.nodeName.toLowerCase()?(a=t("img[usemap='#"+o+"']"),a.length>0&&a.is(":visible")):!1):(/^(input|select|textarea|button|object)$/.test(l)?(r=!i.disabled,r&&(h=t(i).closest("fieldset")[0],h&&(r=!h.disabled))):r="a"===l?i.href||s:s,r&&t(i).is(":visible")&&e(t(i)))},t.extend(t.expr[":"],{focusable:function(e){return t.ui.focusable(e,null!=t.attr(e,"tabindex"))}}),t.ui.focusable,t.fn.form=function(){return"string"==typeof this[0].form?this.closest("form"):t(this[0].form)},t.ui.formResetMixin={_formResetHandler:function(){var e=t(this);setTimeout(function(){var i=e.data("ui-form-reset-instances");t.each(i,function(){this.refresh()})})},_bindFormResetHandler:function(){if(this.form=this.element.form(),this.form.length){var t=this.form.data("ui-form-reset-instances")||[];t.length||this.form.on("reset.ui-form-reset",this._formResetHandler),t.push(this),this.form.data("ui-form-reset-instances",t)}},_unbindFormResetHandler:function(){if(this.form.length){var e=this.form.data("ui-form-reset-instances");e.splice(t.inArray(this,e),1),e.length?this.form.data("ui-form-reset-instances",e):this.form.removeData("ui-form-reset-instances").off("reset.ui-form-reset")}}},"1.7"===t.fn.jquery.substring(0,3)&&(t.each(["Width","Height"],function(e,i){function s(e,i,s,o){return t.each(n,function(){i-=parseFloat(t.css(e,"padding"+this))||0,s&&(i-=parseFloat(t.css(e,"border"+this+"Width"))||0),o&&(i-=parseFloat(t.css(e,"margin"+this))||0)}),i}var n="Width"===i?["Left","Right"]:["Top","Bottom"],o=i.toLowerCase(),a={innerWidth:t.fn.innerWidth,innerHeight:t.fn.innerHeight,outerWidth:t.fn.outerWidth,outerHeight:t.fn.outerHeight};t.fn["inner"+i]=function(e){return void 0===e?a["inner"+i].call(this):this.each(function(){t(this).css(o,s(this,e)+"px")})},t.fn["outer"+i]=function(e,n){return"number"!=typeof e?a["outer"+i].call(this,e):this.each(function(){t(this).css(o,s(this,e,!0,n)+"px")})}}),t.fn.addBack=function(t){return this.add(null==t?this.prevObject:this.prevObject.filter(t))}),t.ui.keyCode={BACKSPACE:8,COMMA:188,DELETE:46,DOWN:40,END:35,ENTER:13,ESCAPE:27,HOME:36,LEFT:37,PAGE_DOWN:34,PAGE_UP:33,PERIOD:190,RIGHT:39,SPACE:32,TAB:9,UP:38},t.ui.escapeSelector=function(){var t=/([!"#$%&'()*+,./:;<=>?@[\]^`{|}~])/g;return function(e){return e.replace(t,"\\$1")}}(),t.fn.labels=function(){var e,i,s,n,o;return this[0].labels&&this[0].labels.length?this.pushStack(this[0].labels):(n=this.eq(0).parents("label"),s=this.attr("id"),s&&(e=this.eq(0).parents().last(),o=e.add(e.length?e.siblings():this.siblings()),i="label[for='"+t.ui.escapeSelector(s)+"']",n=n.add(o.find(i).addBack(i))),this.pushStack(n))},t.fn.scrollParent=function(e){var i=this.css("position"),s="absolute"===i,n=e?/(auto|scroll|hidden)/:/(auto|scroll)/,o=this.parents().filter(function(){var e=t(this);return s&&"static"===e.css("position")?!1:n.test(e.css("overflow")+e.css("overflow-y")+e.css("overflow-x"))}).eq(0);return"fixed"!==i&&o.length?o:t(this[0].ownerDocument||document)},t.extend(t.expr[":"],{tabbable:function(e){var i=t.attr(e,"tabindex"),s=null!=i;return(!s||i>=0)&&t.ui.focusable(e,s)}}),t.fn.extend({uniqueId:function(){var t=0;return function(){return this.each(function(){this.id||(this.id="ui-id-"+ ++t)})}}(),removeUniqueId:function(){return this.each(function(){/^ui-id-\d+$/.test(this.id)&&t(this).removeAttr("id")})}}),t.ui.ie=!!/msie [\w.]+/.exec(navigator.userAgent.toLowerCase());var n=!1;t(document).on("mouseup",function(){n=!1}),t.widget("ui.mouse",{version:"1.12.1",options:{cancel:"input, textarea, button, select, option",distance:1,delay:0},_mouseInit:function(){var e=this;this.element.on("mousedown."+this.widgetName,function(t){return e._mouseDown(t)}).on("click."+this.widgetName,function(i){return!0===t.data(i.target,e.widgetName+".preventClickEvent")?(t.removeData(i.target,e.widgetName+".preventClickEvent"),i.stopImmediatePropagation(),!1):void 0}),this.started=!1},_mouseDestroy:function(){this.element.off("."+this.widgetName),this._mouseMoveDelegate&&this.document.off("mousemove."+this.widgetName,this._mouseMoveDelegate).off("mouseup."+this.widgetName,this._mouseUpDelegate)},_mouseDown:function(e){if(!n){this._mouseMoved=!1,this._mouseStarted&&this._mouseUp(e),this._mouseDownEvent=e;var i=this,s=1===e.which,o="string"==typeof this.options.cancel&&e.target.nodeName?t(e.target).closest(this.options.cancel).length:!1;return s&&!o&&this._mouseCapture(e)?(this.mouseDelayMet=!this.options.delay,this.mouseDelayMet||(this._mouseDelayTimer=setTimeout(function(){i.mouseDelayMet=!0},this.options.delay)),this._mouseDistanceMet(e)&&this._mouseDelayMet(e)&&(this._mouseStarted=this._mouseStart(e)!==!1,!this._mouseStarted)?(e.preventDefault(),!0):(!0===t.data(e.target,this.widgetName+".preventClickEvent")&&t.removeData(e.target,this.widgetName+".preventClickEvent"),this._mouseMoveDelegate=function(t){return i._mouseMove(t)},this._mouseUpDelegate=function(t){return i._mouseUp(t)},this.document.on("mousemove."+this.widgetName,this._mouseMoveDelegate).on("mouseup."+this.widgetName,this._mouseUpDelegate),e.preventDefault(),n=!0,!0)):!0}},_mouseMove:function(e){if(this._mouseMoved){if(t.ui.ie&&(!document.documentMode||9>document.documentMode)&&!e.button)return this._mouseUp(e);if(!e.which)if(e.originalEvent.altKey||e.originalEvent.ctrlKey||e.originalEvent.metaKey||e.originalEvent.shiftKey)this.ignoreMissingWhich=!0;else if(!this.ignoreMissingWhich)return this._mouseUp(e)}return(e.which||e.button)&&(this._mouseMoved=!0),this._mouseStarted?(this._mouseDrag(e),e.preventDefault()):(this._mouseDistanceMet(e)&&this._mouseDelayMet(e)&&(this._mouseStarted=this._mouseStart(this._mouseDownEvent,e)!==!1,this._mouseStarted?this._mouseDrag(e):this._mouseUp(e)),!this._mouseStarted)},_mouseUp:function(e){this.document.off("mousemove."+this.widgetName,this._mouseMoveDelegate).off("mouseup."+this.widgetName,this._mouseUpDelegate),this._mouseStarted&&(this._mouseStarted=!1,e.target===this._mouseDownEvent.target&&t.data(e.target,this.widgetName+".preventClickEvent",!0),this._mouseStop(e)),this._mouseDelayTimer&&(clearTimeout(this._mouseDelayTimer),delete this._mouseDelayTimer),this.ignoreMissingWhich=!1,n=!1,e.preventDefault()},_mouseDistanceMet:function(t){return Math.max(Math.abs(this._mouseDownEvent.pageX-t.pageX),Math.abs(this._mouseDownEvent.pageY-t.pageY))>=this.options.distance},_mouseDelayMet:function(){return this.mouseDelayMet},_mouseStart:function(){},_mouseDrag:function(){},_mouseStop:function(){},_mouseCapture:function(){return!0}}),t.ui.plugin={add:function(e,i,s){var n,o=t.ui[e].prototype;for(n in s)o.plugins[n]=o.plugins[n]||[],o.plugins[n].push([i,s[n]])},call:function(t,e,i,s){var n,o=t.plugins[e];if(o&&(s||t.element[0].parentNode&&11!==t.element[0].parentNode.nodeType))for(n=0;o.length>n;n++)t.options[o[n][0]]&&o[n][1].apply(t.element,i)}},t.widget("ui.resizable",t.ui.mouse,{version:"1.12.1",widgetEventPrefix:"resize",options:{alsoResize:!1,animate:!1,animateDuration:"slow",animateEasing:"swing",aspectRatio:!1,autoHide:!1,classes:{"ui-resizable-se":"ui-icon ui-icon-gripsmall-diagonal-se"},containment:!1,ghost:!1,grid:!1,handles:"e,s,se",helper:!1,maxHeight:null,maxWidth:null,minHeight:10,minWidth:10,zIndex:90,resize:null,start:null,stop:null},_num:function(t){return parseFloat(t)||0},_isNumber:function(t){return!isNaN(parseFloat(t))},_hasScroll:function(e,i){if("hidden"===t(e).css("overflow"))return!1;var s=i&&"left"===i?"scrollLeft":"scrollTop",n=!1;return e[s]>0?!0:(e[s]=1,n=e[s]>0,e[s]=0,n)},_create:function(){var e,i=this.options,s=this;this._addClass("ui-resizable"),t.extend(this,{_aspectRatio:!!i.aspectRatio,aspectRatio:i.aspectRatio,originalElement:this.element,_proportionallyResizeElements:[],_helper:i.helper||i.ghost||i.animate?i.helper||"ui-resizable-helper":null}),this.element[0].nodeName.match(/^(canvas|textarea|input|select|button|img)$/i)&&(this.element.wrap(t("
").css({position:this.element.css("position"),width:this.element.outerWidth(),height:this.element.outerHeight(),top:this.element.css("top"),left:this.element.css("left")})),this.element=this.element.parent().data("ui-resizable",this.element.resizable("instance")),this.elementIsWrapper=!0,e={marginTop:this.originalElement.css("marginTop"),marginRight:this.originalElement.css("marginRight"),marginBottom:this.originalElement.css("marginBottom"),marginLeft:this.originalElement.css("marginLeft")},this.element.css(e),this.originalElement.css("margin",0),this.originalResizeStyle=this.originalElement.css("resize"),this.originalElement.css("resize","none"),this._proportionallyResizeElements.push(this.originalElement.css({position:"static",zoom:1,display:"block"})),this.originalElement.css(e),this._proportionallyResize()),this._setupHandles(),i.autoHide&&t(this.element).on("mouseenter",function(){i.disabled||(s._removeClass("ui-resizable-autohide"),s._handles.show())}).on("mouseleave",function(){i.disabled||s.resizing||(s._addClass("ui-resizable-autohide"),s._handles.hide())}),this._mouseInit()},_destroy:function(){this._mouseDestroy();var e,i=function(e){t(e).removeData("resizable").removeData("ui-resizable").off(".resizable").find(".ui-resizable-handle").remove()};return this.elementIsWrapper&&(i(this.element),e=this.element,this.originalElement.css({position:e.css("position"),width:e.outerWidth(),height:e.outerHeight(),top:e.css("top"),left:e.css("left")}).insertAfter(e),e.remove()),this.originalElement.css("resize",this.originalResizeStyle),i(this.originalElement),this},_setOption:function(t,e){switch(this._super(t,e),t){case"handles":this._removeHandles(),this._setupHandles();break;default:}},_setupHandles:function(){var e,i,s,n,o,a=this.options,r=this;if(this.handles=a.handles||(t(".ui-resizable-handle",this.element).length?{n:".ui-resizable-n",e:".ui-resizable-e",s:".ui-resizable-s",w:".ui-resizable-w",se:".ui-resizable-se",sw:".ui-resizable-sw",ne:".ui-resizable-ne",nw:".ui-resizable-nw"}:"e,s,se"),this._handles=t(),this.handles.constructor===String)for("all"===this.handles&&(this.handles="n,e,s,w,se,sw,ne,nw"),s=this.handles.split(","),this.handles={},i=0;s.length>i;i++)e=t.trim(s[i]),n="ui-resizable-"+e,o=t("
"),this._addClass(o,"ui-resizable-handle "+n),o.css({zIndex:a.zIndex}),this.handles[e]=".ui-resizable-"+e,this.element.append(o);this._renderAxis=function(e){var i,s,n,o;e=e||this.element;for(i in this.handles)this.handles[i].constructor===String?this.handles[i]=this.element.children(this.handles[i]).first().show():(this.handles[i].jquery||this.handles[i].nodeType)&&(this.handles[i]=t(this.handles[i]),this._on(this.handles[i],{mousedown:r._mouseDown})),this.elementIsWrapper&&this.originalElement[0].nodeName.match(/^(textarea|input|select|button)$/i)&&(s=t(this.handles[i],this.element),o=/sw|ne|nw|se|n|s/.test(i)?s.outerHeight():s.outerWidth(),n=["padding",/ne|nw|n/.test(i)?"Top":/se|sw|s/.test(i)?"Bottom":/^e$/.test(i)?"Right":"Left"].join(""),e.css(n,o),this._proportionallyResize()),this._handles=this._handles.add(this.handles[i])},this._renderAxis(this.element),this._handles=this._handles.add(this.element.find(".ui-resizable-handle")),this._handles.disableSelection(),this._handles.on("mouseover",function(){r.resizing||(this.className&&(o=this.className.match(/ui-resizable-(se|sw|ne|nw|n|e|s|w)/i)),r.axis=o&&o[1]?o[1]:"se")}),a.autoHide&&(this._handles.hide(),this._addClass("ui-resizable-autohide"))},_removeHandles:function(){this._handles.remove()},_mouseCapture:function(e){var i,s,n=!1;for(i in this.handles)s=t(this.handles[i])[0],(s===e.target||t.contains(s,e.target))&&(n=!0);return!this.options.disabled&&n},_mouseStart:function(e){var i,s,n,o=this.options,a=this.element;return this.resizing=!0,this._renderProxy(),i=this._num(this.helper.css("left")),s=this._num(this.helper.css("top")),o.containment&&(i+=t(o.containment).scrollLeft()||0,s+=t(o.containment).scrollTop()||0),this.offset=this.helper.offset(),this.position={left:i,top:s},this.size=this._helper?{width:this.helper.width(),height:this.helper.height()}:{width:a.width(),height:a.height()},this.originalSize=this._helper?{width:a.outerWidth(),height:a.outerHeight()}:{width:a.width(),height:a.height()},this.sizeDiff={width:a.outerWidth()-a.width(),height:a.outerHeight()-a.height()},this.originalPosition={left:i,top:s},this.originalMousePosition={left:e.pageX,top:e.pageY},this.aspectRatio="number"==typeof o.aspectRatio?o.aspectRatio:this.originalSize.width/this.originalSize.height||1,n=t(".ui-resizable-"+this.axis).css("cursor"),t("body").css("cursor","auto"===n?this.axis+"-resize":n),this._addClass("ui-resizable-resizing"),this._propagate("start",e),!0},_mouseDrag:function(e){var i,s,n=this.originalMousePosition,o=this.axis,a=e.pageX-n.left||0,r=e.pageY-n.top||0,h=this._change[o];return this._updatePrevProperties(),h?(i=h.apply(this,[e,a,r]),this._updateVirtualBoundaries(e.shiftKey),(this._aspectRatio||e.shiftKey)&&(i=this._updateRatio(i,e)),i=this._respectSize(i,e),this._updateCache(i),this._propagate("resize",e),s=this._applyChanges(),!this._helper&&this._proportionallyResizeElements.length&&this._proportionallyResize(),t.isEmptyObject(s)||(this._updatePrevProperties(),this._trigger("resize",e,this.ui()),this._applyChanges()),!1):!1},_mouseStop:function(e){this.resizing=!1;var i,s,n,o,a,r,h,l=this.options,c=this;return this._helper&&(i=this._proportionallyResizeElements,s=i.length&&/textarea/i.test(i[0].nodeName),n=s&&this._hasScroll(i[0],"left")?0:c.sizeDiff.height,o=s?0:c.sizeDiff.width,a={width:c.helper.width()-o,height:c.helper.height()-n},r=parseFloat(c.element.css("left"))+(c.position.left-c.originalPosition.left)||null,h=parseFloat(c.element.css("top"))+(c.position.top-c.originalPosition.top)||null,l.animate||this.element.css(t.extend(a,{top:h,left:r})),c.helper.height(c.size.height),c.helper.width(c.size.width),this._helper&&!l.animate&&this._proportionallyResize()),t("body").css("cursor","auto"),this._removeClass("ui-resizable-resizing"),this._propagate("stop",e),this._helper&&this.helper.remove(),!1},_updatePrevProperties:function(){this.prevPosition={top:this.position.top,left:this.position.left},this.prevSize={width:this.size.width,height:this.size.height}},_applyChanges:function(){var t={};return this.position.top!==this.prevPosition.top&&(t.top=this.position.top+"px"),this.position.left!==this.prevPosition.left&&(t.left=this.position.left+"px"),this.size.width!==this.prevSize.width&&(t.width=this.size.width+"px"),this.size.height!==this.prevSize.height&&(t.height=this.size.height+"px"),this.helper.css(t),t},_updateVirtualBoundaries:function(t){var e,i,s,n,o,a=this.options;o={minWidth:this._isNumber(a.minWidth)?a.minWidth:0,maxWidth:this._isNumber(a.maxWidth)?a.maxWidth:1/0,minHeight:this._isNumber(a.minHeight)?a.minHeight:0,maxHeight:this._isNumber(a.maxHeight)?a.maxHeight:1/0},(this._aspectRatio||t)&&(e=o.minHeight*this.aspectRatio,s=o.minWidth/this.aspectRatio,i=o.maxHeight*this.aspectRatio,n=o.maxWidth/this.aspectRatio,e>o.minWidth&&(o.minWidth=e),s>o.minHeight&&(o.minHeight=s),o.maxWidth>i&&(o.maxWidth=i),o.maxHeight>n&&(o.maxHeight=n)),this._vBoundaries=o},_updateCache:function(t){this.offset=this.helper.offset(),this._isNumber(t.left)&&(this.position.left=t.left),this._isNumber(t.top)&&(this.position.top=t.top),this._isNumber(t.height)&&(this.size.height=t.height),this._isNumber(t.width)&&(this.size.width=t.width)},_updateRatio:function(t){var e=this.position,i=this.size,s=this.axis;return this._isNumber(t.height)?t.width=t.height*this.aspectRatio:this._isNumber(t.width)&&(t.height=t.width/this.aspectRatio),"sw"===s&&(t.left=e.left+(i.width-t.width),t.top=null),"nw"===s&&(t.top=e.top+(i.height-t.height),t.left=e.left+(i.width-t.width)),t},_respectSize:function(t){var e=this._vBoundaries,i=this.axis,s=this._isNumber(t.width)&&e.maxWidth&&e.maxWidtht.width,a=this._isNumber(t.height)&&e.minHeight&&e.minHeight>t.height,r=this.originalPosition.left+this.originalSize.width,h=this.originalPosition.top+this.originalSize.height,l=/sw|nw|w/.test(i),c=/nw|ne|n/.test(i);return o&&(t.width=e.minWidth),a&&(t.height=e.minHeight),s&&(t.width=e.maxWidth),n&&(t.height=e.maxHeight),o&&l&&(t.left=r-e.minWidth),s&&l&&(t.left=r-e.maxWidth),a&&c&&(t.top=h-e.minHeight),n&&c&&(t.top=h-e.maxHeight),t.width||t.height||t.left||!t.top?t.width||t.height||t.top||!t.left||(t.left=null):t.top=null,t},_getPaddingPlusBorderDimensions:function(t){for(var e=0,i=[],s=[t.css("borderTopWidth"),t.css("borderRightWidth"),t.css("borderBottomWidth"),t.css("borderLeftWidth")],n=[t.css("paddingTop"),t.css("paddingRight"),t.css("paddingBottom"),t.css("paddingLeft")];4>e;e++)i[e]=parseFloat(s[e])||0,i[e]+=parseFloat(n[e])||0;return{height:i[0]+i[2],width:i[1]+i[3]}},_proportionallyResize:function(){if(this._proportionallyResizeElements.length)for(var t,e=0,i=this.helper||this.element;this._proportionallyResizeElements.length>e;e++)t=this._proportionallyResizeElements[e],this.outerDimensions||(this.outerDimensions=this._getPaddingPlusBorderDimensions(t)),t.css({height:i.height()-this.outerDimensions.height||0,width:i.width()-this.outerDimensions.width||0})},_renderProxy:function(){var e=this.element,i=this.options;this.elementOffset=e.offset(),this._helper?(this.helper=this.helper||t("
"),this._addClass(this.helper,this._helper),this.helper.css({width:this.element.outerWidth(),height:this.element.outerHeight(),position:"absolute",left:this.elementOffset.left+"px",top:this.elementOffset.top+"px",zIndex:++i.zIndex}),this.helper.appendTo("body").disableSelection()):this.helper=this.element +},_change:{e:function(t,e){return{width:this.originalSize.width+e}},w:function(t,e){var i=this.originalSize,s=this.originalPosition;return{left:s.left+e,width:i.width-e}},n:function(t,e,i){var s=this.originalSize,n=this.originalPosition;return{top:n.top+i,height:s.height-i}},s:function(t,e,i){return{height:this.originalSize.height+i}},se:function(e,i,s){return t.extend(this._change.s.apply(this,arguments),this._change.e.apply(this,[e,i,s]))},sw:function(e,i,s){return t.extend(this._change.s.apply(this,arguments),this._change.w.apply(this,[e,i,s]))},ne:function(e,i,s){return t.extend(this._change.n.apply(this,arguments),this._change.e.apply(this,[e,i,s]))},nw:function(e,i,s){return t.extend(this._change.n.apply(this,arguments),this._change.w.apply(this,[e,i,s]))}},_propagate:function(e,i){t.ui.plugin.call(this,e,[i,this.ui()]),"resize"!==e&&this._trigger(e,i,this.ui())},plugins:{},ui:function(){return{originalElement:this.originalElement,element:this.element,helper:this.helper,position:this.position,size:this.size,originalSize:this.originalSize,originalPosition:this.originalPosition}}}),t.ui.plugin.add("resizable","animate",{stop:function(e){var i=t(this).resizable("instance"),s=i.options,n=i._proportionallyResizeElements,o=n.length&&/textarea/i.test(n[0].nodeName),a=o&&i._hasScroll(n[0],"left")?0:i.sizeDiff.height,r=o?0:i.sizeDiff.width,h={width:i.size.width-r,height:i.size.height-a},l=parseFloat(i.element.css("left"))+(i.position.left-i.originalPosition.left)||null,c=parseFloat(i.element.css("top"))+(i.position.top-i.originalPosition.top)||null;i.element.animate(t.extend(h,c&&l?{top:c,left:l}:{}),{duration:s.animateDuration,easing:s.animateEasing,step:function(){var s={width:parseFloat(i.element.css("width")),height:parseFloat(i.element.css("height")),top:parseFloat(i.element.css("top")),left:parseFloat(i.element.css("left"))};n&&n.length&&t(n[0]).css({width:s.width,height:s.height}),i._updateCache(s),i._propagate("resize",e)}})}}),t.ui.plugin.add("resizable","containment",{start:function(){var e,i,s,n,o,a,r,h=t(this).resizable("instance"),l=h.options,c=h.element,u=l.containment,d=u instanceof t?u.get(0):/parent/.test(u)?c.parent().get(0):u;d&&(h.containerElement=t(d),/document/.test(u)||u===document?(h.containerOffset={left:0,top:0},h.containerPosition={left:0,top:0},h.parentData={element:t(document),left:0,top:0,width:t(document).width(),height:t(document).height()||document.body.parentNode.scrollHeight}):(e=t(d),i=[],t(["Top","Right","Left","Bottom"]).each(function(t,s){i[t]=h._num(e.css("padding"+s))}),h.containerOffset=e.offset(),h.containerPosition=e.position(),h.containerSize={height:e.innerHeight()-i[3],width:e.innerWidth()-i[1]},s=h.containerOffset,n=h.containerSize.height,o=h.containerSize.width,a=h._hasScroll(d,"left")?d.scrollWidth:o,r=h._hasScroll(d)?d.scrollHeight:n,h.parentData={element:d,left:s.left,top:s.top,width:a,height:r}))},resize:function(e){var i,s,n,o,a=t(this).resizable("instance"),r=a.options,h=a.containerOffset,l=a.position,c=a._aspectRatio||e.shiftKey,u={top:0,left:0},d=a.containerElement,p=!0;d[0]!==document&&/static/.test(d.css("position"))&&(u=h),l.left<(a._helper?h.left:0)&&(a.size.width=a.size.width+(a._helper?a.position.left-h.left:a.position.left-u.left),c&&(a.size.height=a.size.width/a.aspectRatio,p=!1),a.position.left=r.helper?h.left:0),l.top<(a._helper?h.top:0)&&(a.size.height=a.size.height+(a._helper?a.position.top-h.top:a.position.top),c&&(a.size.width=a.size.height*a.aspectRatio,p=!1),a.position.top=a._helper?h.top:0),n=a.containerElement.get(0)===a.element.parent().get(0),o=/relative|absolute/.test(a.containerElement.css("position")),n&&o?(a.offset.left=a.parentData.left+a.position.left,a.offset.top=a.parentData.top+a.position.top):(a.offset.left=a.element.offset().left,a.offset.top=a.element.offset().top),i=Math.abs(a.sizeDiff.width+(a._helper?a.offset.left-u.left:a.offset.left-h.left)),s=Math.abs(a.sizeDiff.height+(a._helper?a.offset.top-u.top:a.offset.top-h.top)),i+a.size.width>=a.parentData.width&&(a.size.width=a.parentData.width-i,c&&(a.size.height=a.size.width/a.aspectRatio,p=!1)),s+a.size.height>=a.parentData.height&&(a.size.height=a.parentData.height-s,c&&(a.size.width=a.size.height*a.aspectRatio,p=!1)),p||(a.position.left=a.prevPosition.left,a.position.top=a.prevPosition.top,a.size.width=a.prevSize.width,a.size.height=a.prevSize.height)},stop:function(){var e=t(this).resizable("instance"),i=e.options,s=e.containerOffset,n=e.containerPosition,o=e.containerElement,a=t(e.helper),r=a.offset(),h=a.outerWidth()-e.sizeDiff.width,l=a.outerHeight()-e.sizeDiff.height;e._helper&&!i.animate&&/relative/.test(o.css("position"))&&t(this).css({left:r.left-n.left-s.left,width:h,height:l}),e._helper&&!i.animate&&/static/.test(o.css("position"))&&t(this).css({left:r.left-n.left-s.left,width:h,height:l})}}),t.ui.plugin.add("resizable","alsoResize",{start:function(){var e=t(this).resizable("instance"),i=e.options;t(i.alsoResize).each(function(){var e=t(this);e.data("ui-resizable-alsoresize",{width:parseFloat(e.width()),height:parseFloat(e.height()),left:parseFloat(e.css("left")),top:parseFloat(e.css("top"))})})},resize:function(e,i){var s=t(this).resizable("instance"),n=s.options,o=s.originalSize,a=s.originalPosition,r={height:s.size.height-o.height||0,width:s.size.width-o.width||0,top:s.position.top-a.top||0,left:s.position.left-a.left||0};t(n.alsoResize).each(function(){var e=t(this),s=t(this).data("ui-resizable-alsoresize"),n={},o=e.parents(i.originalElement[0]).length?["width","height"]:["width","height","top","left"];t.each(o,function(t,e){var i=(s[e]||0)+(r[e]||0);i&&i>=0&&(n[e]=i||null)}),e.css(n)})},stop:function(){t(this).removeData("ui-resizable-alsoresize")}}),t.ui.plugin.add("resizable","ghost",{start:function(){var e=t(this).resizable("instance"),i=e.size;e.ghost=e.originalElement.clone(),e.ghost.css({opacity:.25,display:"block",position:"relative",height:i.height,width:i.width,margin:0,left:0,top:0}),e._addClass(e.ghost,"ui-resizable-ghost"),t.uiBackCompat!==!1&&"string"==typeof e.options.ghost&&e.ghost.addClass(this.options.ghost),e.ghost.appendTo(e.helper)},resize:function(){var e=t(this).resizable("instance");e.ghost&&e.ghost.css({position:"relative",height:e.size.height,width:e.size.width})},stop:function(){var e=t(this).resizable("instance");e.ghost&&e.helper&&e.helper.get(0).removeChild(e.ghost.get(0))}}),t.ui.plugin.add("resizable","grid",{resize:function(){var e,i=t(this).resizable("instance"),s=i.options,n=i.size,o=i.originalSize,a=i.originalPosition,r=i.axis,h="number"==typeof s.grid?[s.grid,s.grid]:s.grid,l=h[0]||1,c=h[1]||1,u=Math.round((n.width-o.width)/l)*l,d=Math.round((n.height-o.height)/c)*c,p=o.width+u,f=o.height+d,m=s.maxWidth&&p>s.maxWidth,g=s.maxHeight&&f>s.maxHeight,_=s.minWidth&&s.minWidth>p,v=s.minHeight&&s.minHeight>f;s.grid=h,_&&(p+=l),v&&(f+=c),m&&(p-=l),g&&(f-=c),/^(se|s|e)$/.test(r)?(i.size.width=p,i.size.height=f):/^(ne)$/.test(r)?(i.size.width=p,i.size.height=f,i.position.top=a.top-d):/^(sw)$/.test(r)?(i.size.width=p,i.size.height=f,i.position.left=a.left-u):((0>=f-c||0>=p-l)&&(e=i._getPaddingPlusBorderDimensions(this)),f-c>0?(i.size.height=f,i.position.top=a.top-d):(f=c-e.height,i.size.height=f,i.position.top=a.top+o.height-f),p-l>0?(i.size.width=p,i.position.left=a.left-u):(p=l-e.width,i.size.width=p,i.position.left=a.left+o.width-p))}}),t.ui.resizable});/** + * Copyright (c) 2007 Ariel Flesler - aflesler ○ gmail • com | https://github.com/flesler + * Licensed under MIT + * @author Ariel Flesler + * @version 2.1.2 + */ +;(function(f){"use strict";"function"===typeof define&&define.amd?define(["jquery"],f):"undefined"!==typeof module&&module.exports?module.exports=f(require("jquery")):f(jQuery)})(function($){"use strict";function n(a){return!a.nodeName||-1!==$.inArray(a.nodeName.toLowerCase(),["iframe","#document","html","body"])}function h(a){return $.isFunction(a)||$.isPlainObject(a)?a:{top:a,left:a}}var p=$.scrollTo=function(a,d,b){return $(window).scrollTo(a,d,b)};p.defaults={axis:"xy",duration:0,limit:!0};$.fn.scrollTo=function(a,d,b){"object"=== typeof d&&(b=d,d=0);"function"===typeof b&&(b={onAfter:b});"max"===a&&(a=9E9);b=$.extend({},p.defaults,b);d=d||b.duration;var u=b.queue&&1=f[g]?0:Math.min(f[g],n));!a&&1-1){targetElements.on(evt+EVENT_NAMESPACE,function elementToggle(event){$.powerTip.toggle(this,event)})}else{targetElements.on(evt+EVENT_NAMESPACE,function elementOpen(event){$.powerTip.show(this,event)})}});$.each(options.closeEvents,function(idx,evt){if($.inArray(evt,options.openEvents)<0){targetElements.on(evt+EVENT_NAMESPACE,function elementClose(event){$.powerTip.hide(this,!isMouseEvent(event))})}});targetElements.on("keydown"+EVENT_NAMESPACE,function elementKeyDown(event){if(event.keyCode===27){$.powerTip.hide(this,true)}})}return targetElements};$.fn.powerTip.defaults={fadeInTime:200,fadeOutTime:100,followMouse:false,popupId:"powerTip",popupClass:null,intentSensitivity:7,intentPollInterval:100,closeDelay:100,placement:"n",smartPlacement:false,offset:10,mouseOnToPopup:false,manual:false,openEvents:["mouseenter","focus"],closeEvents:["mouseleave","blur"]};$.fn.powerTip.smartPlacementLists={n:["n","ne","nw","s"],e:["e","ne","se","w","nw","sw","n","s","e"],s:["s","se","sw","n"],w:["w","nw","sw","e","ne","se","n","s","w"],nw:["nw","w","sw","n","s","se","nw"],ne:["ne","e","se","n","s","sw","ne"],sw:["sw","w","nw","s","n","ne","sw"],se:["se","e","ne","s","n","nw","se"],"nw-alt":["nw-alt","n","ne-alt","sw-alt","s","se-alt","w","e"],"ne-alt":["ne-alt","n","nw-alt","se-alt","s","sw-alt","e","w"],"sw-alt":["sw-alt","s","se-alt","nw-alt","n","ne-alt","w","e"],"se-alt":["se-alt","s","sw-alt","ne-alt","n","nw-alt","e","w"]};$.powerTip={show:function apiShowTip(element,event){if(isMouseEvent(event)){trackMouse(event);session.previousX=event.pageX;session.previousY=event.pageY;$(element).data(DATA_DISPLAYCONTROLLER).show()}else{$(element).first().data(DATA_DISPLAYCONTROLLER).show(true,true)}return element},reposition:function apiResetPosition(element){$(element).first().data(DATA_DISPLAYCONTROLLER).resetPosition();return element},hide:function apiCloseTip(element,immediate){var displayController;immediate=element?immediate:true;if(element){displayController=$(element).first().data(DATA_DISPLAYCONTROLLER)}else if(session.activeHover){displayController=session.activeHover.data(DATA_DISPLAYCONTROLLER)}if(displayController){displayController.hide(immediate)}return element},toggle:function apiToggle(element,event){if(session.activeHover&&session.activeHover.is(element)){$.powerTip.hide(element,!isMouseEvent(event))}else{$.powerTip.show(element,event)}return element}};$.powerTip.showTip=$.powerTip.show;$.powerTip.closeTip=$.powerTip.hide;function CSSCoordinates(){var me=this;me.top="auto";me.left="auto";me.right="auto";me.bottom="auto";me.set=function(property,value){if($.isNumeric(value)){me[property]=Math.round(value)}}}function DisplayController(element,options,tipController){var hoverTimer=null,myCloseDelay=null;function openTooltip(immediate,forceOpen){cancelTimer();if(!element.data(DATA_HASACTIVEHOVER)){if(!immediate){session.tipOpenImminent=true;hoverTimer=setTimeout(function intentDelay(){hoverTimer=null;checkForIntent()},options.intentPollInterval)}else{if(forceOpen){element.data(DATA_FORCEDOPEN,true)}closeAnyDelayed();tipController.showTip(element)}}else{cancelClose()}}function closeTooltip(disableDelay){if(myCloseDelay){myCloseDelay=session.closeDelayTimeout=clearTimeout(myCloseDelay);session.delayInProgress=false}cancelTimer();session.tipOpenImminent=false;if(element.data(DATA_HASACTIVEHOVER)){element.data(DATA_FORCEDOPEN,false);if(!disableDelay){session.delayInProgress=true;session.closeDelayTimeout=setTimeout(function closeDelay(){session.closeDelayTimeout=null;tipController.hideTip(element);session.delayInProgress=false;myCloseDelay=null},options.closeDelay);myCloseDelay=session.closeDelayTimeout}else{tipController.hideTip(element)}}}function checkForIntent(){var xDifference=Math.abs(session.previousX-session.currentX),yDifference=Math.abs(session.previousY-session.currentY),totalDifference=xDifference+yDifference;if(totalDifference",{id:options.popupId});if($body.length===0){$body=$("body")}$body.append(tipElement);session.tooltips=session.tooltips?session.tooltips.add(tipElement):tipElement}if(options.followMouse){if(!tipElement.data(DATA_HASMOUSEMOVE)){$document.on("mousemove"+EVENT_NAMESPACE,positionTipOnCursor);$window.on("scroll"+EVENT_NAMESPACE,positionTipOnCursor);tipElement.data(DATA_HASMOUSEMOVE,true)}}function beginShowTip(element){element.data(DATA_HASACTIVEHOVER,true);tipElement.queue(function queueTipInit(next){showTip(element);next()})}function showTip(element){var tipContent;if(!element.data(DATA_HASACTIVEHOVER)){return}if(session.isTipOpen){if(!session.isClosing){hideTip(session.activeHover)}tipElement.delay(100).queue(function queueTipAgain(next){showTip(element);next()});return}element.trigger("powerTipPreRender");tipContent=getTooltipContent(element);if(tipContent){tipElement.empty().append(tipContent)}else{return}element.trigger("powerTipRender");session.activeHover=element;session.isTipOpen=true;tipElement.data(DATA_MOUSEONTOTIP,options.mouseOnToPopup);tipElement.addClass(options.popupClass);if(!options.followMouse||element.data(DATA_FORCEDOPEN)){positionTipOnElement(element);session.isFixedTipOpen=true}else{positionTipOnCursor()}if(!element.data(DATA_FORCEDOPEN)&&!options.followMouse){$document.on("click"+EVENT_NAMESPACE,function documentClick(event){var target=event.target;if(target!==element[0]){if(options.mouseOnToPopup){if(target!==tipElement[0]&&!$.contains(tipElement[0],target)){$.powerTip.hide()}}else{$.powerTip.hide()}}})}if(options.mouseOnToPopup&&!options.manual){tipElement.on("mouseenter"+EVENT_NAMESPACE,function tipMouseEnter(){if(session.activeHover){session.activeHover.data(DATA_DISPLAYCONTROLLER).cancel()}});tipElement.on("mouseleave"+EVENT_NAMESPACE,function tipMouseLeave(){if(session.activeHover){session.activeHover.data(DATA_DISPLAYCONTROLLER).hide()}})}tipElement.fadeIn(options.fadeInTime,function fadeInCallback(){if(!session.desyncTimeout){session.desyncTimeout=setInterval(closeDesyncedTip,500)}element.trigger("powerTipOpen")})}function hideTip(element){session.isClosing=true;session.isTipOpen=false;session.desyncTimeout=clearInterval(session.desyncTimeout);element.data(DATA_HASACTIVEHOVER,false);element.data(DATA_FORCEDOPEN,false);$document.off("click"+EVENT_NAMESPACE);tipElement.off(EVENT_NAMESPACE);tipElement.fadeOut(options.fadeOutTime,function fadeOutCallback(){var coords=new CSSCoordinates;session.activeHover=null;session.isClosing=false;session.isFixedTipOpen=false;tipElement.removeClass();coords.set("top",session.currentY+options.offset);coords.set("left",session.currentX+options.offset);tipElement.css(coords);element.trigger("powerTipClose")})}function positionTipOnCursor(){var tipWidth,tipHeight,coords,collisions,collisionCount;if(!session.isFixedTipOpen&&(session.isTipOpen||session.tipOpenImminent&&tipElement.data(DATA_HASMOUSEMOVE))){tipWidth=tipElement.outerWidth();tipHeight=tipElement.outerHeight();coords=new CSSCoordinates;coords.set("top",session.currentY+options.offset);coords.set("left",session.currentX+options.offset);collisions=getViewportCollisions(coords,tipWidth,tipHeight);if(collisions!==Collision.none){collisionCount=countFlags(collisions);if(collisionCount===1){if(collisions===Collision.right){coords.set("left",session.scrollLeft+session.windowWidth-tipWidth)}else if(collisions===Collision.bottom){coords.set("top",session.scrollTop+session.windowHeight-tipHeight)}}else{coords.set("left",session.currentX-tipWidth-options.offset);coords.set("top",session.currentY-tipHeight-options.offset)}}tipElement.css(coords)}}function positionTipOnElement(element){var priorityList,finalPlacement;if(options.smartPlacement||options.followMouse&&element.data(DATA_FORCEDOPEN)){priorityList=$.fn.powerTip.smartPlacementLists[options.placement];$.each(priorityList,function(idx,pos){var collisions=getViewportCollisions(placeTooltip(element,pos),tipElement.outerWidth(),tipElement.outerHeight());finalPlacement=pos;return collisions!==Collision.none})}else{placeTooltip(element,options.placement);finalPlacement=options.placement}tipElement.removeClass("w nw sw e ne se n s w se-alt sw-alt ne-alt nw-alt");tipElement.addClass(finalPlacement)}function placeTooltip(element,placement){var iterationCount=0,tipWidth,tipHeight,coords=new CSSCoordinates;coords.set("top",0);coords.set("left",0);tipElement.css(coords);do{tipWidth=tipElement.outerWidth();tipHeight=tipElement.outerHeight();coords=placementCalculator.compute(element,placement,tipWidth,tipHeight,options.offset);tipElement.css(coords)}while(++iterationCount<=5&&(tipWidth!==tipElement.outerWidth()||tipHeight!==tipElement.outerHeight()));return coords}function closeDesyncedTip(){var isDesynced=false,hasDesyncableCloseEvent=$.grep(["mouseleave","mouseout","blur","focusout"],function(eventType){return $.inArray(eventType,options.closeEvents)!==-1}).length>0;if(session.isTipOpen&&!session.isClosing&&!session.delayInProgress&&hasDesyncableCloseEvent){if(session.activeHover.data(DATA_HASACTIVEHOVER)===false||session.activeHover.is(":disabled")){isDesynced=true}else if(!isMouseOver(session.activeHover)&&!session.activeHover.is(":focus")&&!session.activeHover.data(DATA_FORCEDOPEN)){if(tipElement.data(DATA_MOUSEONTOTIP)){if(!isMouseOver(tipElement)){isDesynced=true}}else{isDesynced=true}}if(isDesynced){hideTip(session.activeHover)}}}this.showTip=beginShowTip;this.hideTip=hideTip;this.resetPosition=positionTipOnElement}function isSvgElement(element){return Boolean(window.SVGElement&&element[0]instanceof SVGElement)}function isMouseEvent(event){return Boolean(event&&$.inArray(event.type,MOUSE_EVENTS)>-1&&typeof event.pageX==="number")}function initTracking(){if(!session.mouseTrackingActive){session.mouseTrackingActive=true;getViewportDimensions();$(getViewportDimensions);$document.on("mousemove"+EVENT_NAMESPACE,trackMouse);$window.on("resize"+EVENT_NAMESPACE,trackResize);$window.on("scroll"+EVENT_NAMESPACE,trackScroll)}}function getViewportDimensions(){session.scrollLeft=$window.scrollLeft();session.scrollTop=$window.scrollTop();session.windowWidth=$window.width();session.windowHeight=$window.height()}function trackResize(){session.windowWidth=$window.width();session.windowHeight=$window.height()}function trackScroll(){var x=$window.scrollLeft(),y=$window.scrollTop();if(x!==session.scrollLeft){session.currentX+=x-session.scrollLeft;session.scrollLeft=x}if(y!==session.scrollTop){session.currentY+=y-session.scrollTop;session.scrollTop=y}}function trackMouse(event){session.currentX=event.pageX;session.currentY=event.pageY}function isMouseOver(element){var elementPosition=element.offset(),elementBox=element[0].getBoundingClientRect(),elementWidth=elementBox.right-elementBox.left,elementHeight=elementBox.bottom-elementBox.top;return session.currentX>=elementPosition.left&&session.currentX<=elementPosition.left+elementWidth&&session.currentY>=elementPosition.top&&session.currentY<=elementPosition.top+elementHeight}function getTooltipContent(element){var tipText=element.data(DATA_POWERTIP),tipObject=element.data(DATA_POWERTIPJQ),tipTarget=element.data(DATA_POWERTIPTARGET),targetElement,content;if(tipText){if($.isFunction(tipText)){tipText=tipText.call(element[0])}content=tipText}else if(tipObject){if($.isFunction(tipObject)){tipObject=tipObject.call(element[0])}if(tipObject.length>0){content=tipObject.clone(true,true)}}else if(tipTarget){targetElement=$("#"+tipTarget);if(targetElement.length>0){content=targetElement.html()}}return content}function getViewportCollisions(coords,elementWidth,elementHeight){var viewportTop=session.scrollTop,viewportLeft=session.scrollLeft,viewportBottom=viewportTop+session.windowHeight,viewportRight=viewportLeft+session.windowWidth,collisions=Collision.none;if(coords.topviewportBottom||Math.abs(coords.bottom-session.windowHeight)>viewportBottom){collisions|=Collision.bottom}if(coords.leftviewportRight){collisions|=Collision.left}if(coords.left+elementWidth>viewportRight||coords.right1)){a.preventDefault();var c=a.originalEvent.changedTouches[0],d=document.createEvent("MouseEvents");d.initMouseEvent(b,!0,!0,window,1,c.screenX,c.screenY,c.clientX,c.clientY,!1,!1,!1,!1,0,null),a.target.dispatchEvent(d)}}if(a.support.touch="ontouchend"in document,a.support.touch){var e,b=a.ui.mouse.prototype,c=b._mouseInit,d=b._mouseDestroy;b._touchStart=function(a){var b=this;!e&&b._mouseCapture(a.originalEvent.changedTouches[0])&&(e=!0,b._touchMoved=!1,f(a,"mouseover"),f(a,"mousemove"),f(a,"mousedown"))},b._touchMove=function(a){e&&(this._touchMoved=!0,f(a,"mousemove"))},b._touchEnd=function(a){e&&(f(a,"mouseup"),f(a,"mouseout"),this._touchMoved||f(a,"click"),e=!1)},b._mouseInit=function(){var b=this;b.element.bind({touchstart:a.proxy(b,"_touchStart"),touchmove:a.proxy(b,"_touchMove"),touchend:a.proxy(b,"_touchEnd")}),c.call(b)},b._mouseDestroy=function(){var b=this;b.element.unbind({touchstart:a.proxy(b,"_touchStart"),touchmove:a.proxy(b,"_touchMove"),touchend:a.proxy(b,"_touchEnd")}),d.call(b)}}}(jQuery);/*! SmartMenus jQuery Plugin - v1.1.0 - September 17, 2017 + * http://www.smartmenus.org/ + * Copyright Vasil Dinkov, Vadikom Web Ltd. http://vadikom.com; Licensed MIT */(function(t){"function"==typeof define&&define.amd?define(["jquery"],t):"object"==typeof module&&"object"==typeof module.exports?module.exports=t(require("jquery")):t(jQuery)})(function($){function initMouseDetection(t){var e=".smartmenus_mouse";if(mouseDetectionEnabled||t)mouseDetectionEnabled&&t&&($(document).off(e),mouseDetectionEnabled=!1);else{var i=!0,s=null,o={mousemove:function(t){var e={x:t.pageX,y:t.pageY,timeStamp:(new Date).getTime()};if(s){var o=Math.abs(s.x-e.x),a=Math.abs(s.y-e.y);if((o>0||a>0)&&2>=o&&2>=a&&300>=e.timeStamp-s.timeStamp&&(mouse=!0,i)){var n=$(t.target).closest("a");n.is("a")&&$.each(menuTrees,function(){return $.contains(this.$root[0],n[0])?(this.itemEnter({currentTarget:n[0]}),!1):void 0}),i=!1}}s=e}};o[touchEvents?"touchstart":"pointerover pointermove pointerout MSPointerOver MSPointerMove MSPointerOut"]=function(t){isTouchEvent(t.originalEvent)&&(mouse=!1)},$(document).on(getEventsNS(o,e)),mouseDetectionEnabled=!0}}function isTouchEvent(t){return!/^(4|mouse)$/.test(t.pointerType)}function getEventsNS(t,e){e||(e="");var i={};for(var s in t)i[s.split(" ").join(e+" ")+e]=t[s];return i}var menuTrees=[],mouse=!1,touchEvents="ontouchstart"in window,mouseDetectionEnabled=!1,requestAnimationFrame=window.requestAnimationFrame||function(t){return setTimeout(t,1e3/60)},cancelAnimationFrame=window.cancelAnimationFrame||function(t){clearTimeout(t)},canAnimate=!!$.fn.animate;return $.SmartMenus=function(t,e){this.$root=$(t),this.opts=e,this.rootId="",this.accessIdPrefix="",this.$subArrow=null,this.activatedItems=[],this.visibleSubMenus=[],this.showTimeout=0,this.hideTimeout=0,this.scrollTimeout=0,this.clickActivated=!1,this.focusActivated=!1,this.zIndexInc=0,this.idInc=0,this.$firstLink=null,this.$firstSub=null,this.disabled=!1,this.$disableOverlay=null,this.$touchScrollingSub=null,this.cssTransforms3d="perspective"in t.style||"webkitPerspective"in t.style,this.wasCollapsible=!1,this.init()},$.extend($.SmartMenus,{hideAll:function(){$.each(menuTrees,function(){this.menuHideAll()})},destroy:function(){for(;menuTrees.length;)menuTrees[0].destroy();initMouseDetection(!0)},prototype:{init:function(t){var e=this;if(!t){menuTrees.push(this),this.rootId=((new Date).getTime()+Math.random()+"").replace(/\D/g,""),this.accessIdPrefix="sm-"+this.rootId+"-",this.$root.hasClass("sm-rtl")&&(this.opts.rightToLeftSubMenus=!0);var i=".smartmenus";this.$root.data("smartmenus",this).attr("data-smartmenus-id",this.rootId).dataSM("level",1).on(getEventsNS({"mouseover focusin":$.proxy(this.rootOver,this),"mouseout focusout":$.proxy(this.rootOut,this),keydown:$.proxy(this.rootKeyDown,this)},i)).on(getEventsNS({mouseenter:$.proxy(this.itemEnter,this),mouseleave:$.proxy(this.itemLeave,this),mousedown:$.proxy(this.itemDown,this),focus:$.proxy(this.itemFocus,this),blur:$.proxy(this.itemBlur,this),click:$.proxy(this.itemClick,this)},i),"a"),i+=this.rootId,this.opts.hideOnClick&&$(document).on(getEventsNS({touchstart:$.proxy(this.docTouchStart,this),touchmove:$.proxy(this.docTouchMove,this),touchend:$.proxy(this.docTouchEnd,this),click:$.proxy(this.docClick,this)},i)),$(window).on(getEventsNS({"resize orientationchange":$.proxy(this.winResize,this)},i)),this.opts.subIndicators&&(this.$subArrow=$("").addClass("sub-arrow"),this.opts.subIndicatorsText&&this.$subArrow.html(this.opts.subIndicatorsText)),initMouseDetection()}if(this.$firstSub=this.$root.find("ul").each(function(){e.menuInit($(this))}).eq(0),this.$firstLink=this.$root.find("a").eq(0),this.opts.markCurrentItem){var s=/(index|default)\.[^#\?\/]*/i,o=/#.*/,a=window.location.href.replace(s,""),n=a.replace(o,"");this.$root.find("a").each(function(){var t=this.href.replace(s,""),i=$(this);(t==a||t==n)&&(i.addClass("current"),e.opts.markCurrentTree&&i.parentsUntil("[data-smartmenus-id]","ul").each(function(){$(this).dataSM("parent-a").addClass("current")}))})}this.wasCollapsible=this.isCollapsible()},destroy:function(t){if(!t){var e=".smartmenus";this.$root.removeData("smartmenus").removeAttr("data-smartmenus-id").removeDataSM("level").off(e),e+=this.rootId,$(document).off(e),$(window).off(e),this.opts.subIndicators&&(this.$subArrow=null)}this.menuHideAll();var i=this;this.$root.find("ul").each(function(){var t=$(this);t.dataSM("scroll-arrows")&&t.dataSM("scroll-arrows").remove(),t.dataSM("shown-before")&&((i.opts.subMenusMinWidth||i.opts.subMenusMaxWidth)&&t.css({width:"",minWidth:"",maxWidth:""}).removeClass("sm-nowrap"),t.dataSM("scroll-arrows")&&t.dataSM("scroll-arrows").remove(),t.css({zIndex:"",top:"",left:"",marginLeft:"",marginTop:"",display:""})),0==(t.attr("id")||"").indexOf(i.accessIdPrefix)&&t.removeAttr("id")}).removeDataSM("in-mega").removeDataSM("shown-before").removeDataSM("scroll-arrows").removeDataSM("parent-a").removeDataSM("level").removeDataSM("beforefirstshowfired").removeAttr("role").removeAttr("aria-hidden").removeAttr("aria-labelledby").removeAttr("aria-expanded"),this.$root.find("a.has-submenu").each(function(){var t=$(this);0==t.attr("id").indexOf(i.accessIdPrefix)&&t.removeAttr("id")}).removeClass("has-submenu").removeDataSM("sub").removeAttr("aria-haspopup").removeAttr("aria-controls").removeAttr("aria-expanded").closest("li").removeDataSM("sub"),this.opts.subIndicators&&this.$root.find("span.sub-arrow").remove(),this.opts.markCurrentItem&&this.$root.find("a.current").removeClass("current"),t||(this.$root=null,this.$firstLink=null,this.$firstSub=null,this.$disableOverlay&&(this.$disableOverlay.remove(),this.$disableOverlay=null),menuTrees.splice($.inArray(this,menuTrees),1))},disable:function(t){if(!this.disabled){if(this.menuHideAll(),!t&&!this.opts.isPopup&&this.$root.is(":visible")){var e=this.$root.offset();this.$disableOverlay=$('
').css({position:"absolute",top:e.top,left:e.left,width:this.$root.outerWidth(),height:this.$root.outerHeight(),zIndex:this.getStartZIndex(!0),opacity:0}).appendTo(document.body)}this.disabled=!0}},docClick:function(t){return this.$touchScrollingSub?(this.$touchScrollingSub=null,void 0):((this.visibleSubMenus.length&&!$.contains(this.$root[0],t.target)||$(t.target).closest("a").length)&&this.menuHideAll(),void 0)},docTouchEnd:function(){if(this.lastTouch){if(!(!this.visibleSubMenus.length||void 0!==this.lastTouch.x2&&this.lastTouch.x1!=this.lastTouch.x2||void 0!==this.lastTouch.y2&&this.lastTouch.y1!=this.lastTouch.y2||this.lastTouch.target&&$.contains(this.$root[0],this.lastTouch.target))){this.hideTimeout&&(clearTimeout(this.hideTimeout),this.hideTimeout=0);var t=this;this.hideTimeout=setTimeout(function(){t.menuHideAll()},350)}this.lastTouch=null}},docTouchMove:function(t){if(this.lastTouch){var e=t.originalEvent.touches[0];this.lastTouch.x2=e.pageX,this.lastTouch.y2=e.pageY}},docTouchStart:function(t){var e=t.originalEvent.touches[0];this.lastTouch={x1:e.pageX,y1:e.pageY,target:e.target}},enable:function(){this.disabled&&(this.$disableOverlay&&(this.$disableOverlay.remove(),this.$disableOverlay=null),this.disabled=!1)},getClosestMenu:function(t){for(var e=$(t).closest("ul");e.dataSM("in-mega");)e=e.parent().closest("ul");return e[0]||null},getHeight:function(t){return this.getOffset(t,!0)},getOffset:function(t,e){var i;"none"==t.css("display")&&(i={position:t[0].style.position,visibility:t[0].style.visibility},t.css({position:"absolute",visibility:"hidden"}).show());var s=t[0].getBoundingClientRect&&t[0].getBoundingClientRect(),o=s&&(e?s.height||s.bottom-s.top:s.width||s.right-s.left);return o||0===o||(o=e?t[0].offsetHeight:t[0].offsetWidth),i&&t.hide().css(i),o},getStartZIndex:function(t){var e=parseInt(this[t?"$root":"$firstSub"].css("z-index"));return!t&&isNaN(e)&&(e=parseInt(this.$root.css("z-index"))),isNaN(e)?1:e},getTouchPoint:function(t){return t.touches&&t.touches[0]||t.changedTouches&&t.changedTouches[0]||t},getViewport:function(t){var e=t?"Height":"Width",i=document.documentElement["client"+e],s=window["inner"+e];return s&&(i=Math.min(i,s)),i},getViewportHeight:function(){return this.getViewport(!0)},getViewportWidth:function(){return this.getViewport()},getWidth:function(t){return this.getOffset(t)},handleEvents:function(){return!this.disabled&&this.isCSSOn()},handleItemEvents:function(t){return this.handleEvents()&&!this.isLinkInMegaMenu(t)},isCollapsible:function(){return"static"==this.$firstSub.css("position")},isCSSOn:function(){return"inline"!=this.$firstLink.css("display")},isFixed:function(){var t="fixed"==this.$root.css("position");return t||this.$root.parentsUntil("body").each(function(){return"fixed"==$(this).css("position")?(t=!0,!1):void 0}),t},isLinkInMegaMenu:function(t){return $(this.getClosestMenu(t[0])).hasClass("mega-menu")},isTouchMode:function(){return!mouse||this.opts.noMouseOver||this.isCollapsible()},itemActivate:function(t,e){var i=t.closest("ul"),s=i.dataSM("level");if(s>1&&(!this.activatedItems[s-2]||this.activatedItems[s-2][0]!=i.dataSM("parent-a")[0])){var o=this;$(i.parentsUntil("[data-smartmenus-id]","ul").get().reverse()).add(i).each(function(){o.itemActivate($(this).dataSM("parent-a"))})}if((!this.isCollapsible()||e)&&this.menuHideSubMenus(this.activatedItems[s-1]&&this.activatedItems[s-1][0]==t[0]?s:s-1),this.activatedItems[s-1]=t,this.$root.triggerHandler("activate.smapi",t[0])!==!1){var a=t.dataSM("sub");a&&(this.isTouchMode()||!this.opts.showOnClick||this.clickActivated)&&this.menuShow(a)}},itemBlur:function(t){var e=$(t.currentTarget);this.handleItemEvents(e)&&this.$root.triggerHandler("blur.smapi",e[0])},itemClick:function(t){var e=$(t.currentTarget);if(this.handleItemEvents(e)){if(this.$touchScrollingSub&&this.$touchScrollingSub[0]==e.closest("ul")[0])return this.$touchScrollingSub=null,t.stopPropagation(),!1;if(this.$root.triggerHandler("click.smapi",e[0])===!1)return!1;var i=$(t.target).is(".sub-arrow"),s=e.dataSM("sub"),o=s?2==s.dataSM("level"):!1,a=this.isCollapsible(),n=/toggle$/.test(this.opts.collapsibleBehavior),r=/link$/.test(this.opts.collapsibleBehavior),h=/^accordion/.test(this.opts.collapsibleBehavior);if(s&&!s.is(":visible")){if((!r||!a||i)&&(this.opts.showOnClick&&o&&(this.clickActivated=!0),this.itemActivate(e,h),s.is(":visible")))return this.focusActivated=!0,!1}else if(a&&(n||i))return this.itemActivate(e,h),this.menuHide(s),n&&(this.focusActivated=!1),!1;return this.opts.showOnClick&&o||e.hasClass("disabled")||this.$root.triggerHandler("select.smapi",e[0])===!1?!1:void 0}},itemDown:function(t){var e=$(t.currentTarget);this.handleItemEvents(e)&&e.dataSM("mousedown",!0)},itemEnter:function(t){var e=$(t.currentTarget);if(this.handleItemEvents(e)){if(!this.isTouchMode()){this.showTimeout&&(clearTimeout(this.showTimeout),this.showTimeout=0);var i=this;this.showTimeout=setTimeout(function(){i.itemActivate(e)},this.opts.showOnClick&&1==e.closest("ul").dataSM("level")?1:this.opts.showTimeout)}this.$root.triggerHandler("mouseenter.smapi",e[0])}},itemFocus:function(t){var e=$(t.currentTarget);this.handleItemEvents(e)&&(!this.focusActivated||this.isTouchMode()&&e.dataSM("mousedown")||this.activatedItems.length&&this.activatedItems[this.activatedItems.length-1][0]==e[0]||this.itemActivate(e,!0),this.$root.triggerHandler("focus.smapi",e[0]))},itemLeave:function(t){var e=$(t.currentTarget);this.handleItemEvents(e)&&(this.isTouchMode()||(e[0].blur(),this.showTimeout&&(clearTimeout(this.showTimeout),this.showTimeout=0)),e.removeDataSM("mousedown"),this.$root.triggerHandler("mouseleave.smapi",e[0]))},menuHide:function(t){if(this.$root.triggerHandler("beforehide.smapi",t[0])!==!1&&(canAnimate&&t.stop(!0,!0),"none"!=t.css("display"))){var e=function(){t.css("z-index","")};this.isCollapsible()?canAnimate&&this.opts.collapsibleHideFunction?this.opts.collapsibleHideFunction.call(this,t,e):t.hide(this.opts.collapsibleHideDuration,e):canAnimate&&this.opts.hideFunction?this.opts.hideFunction.call(this,t,e):t.hide(this.opts.hideDuration,e),t.dataSM("scroll")&&(this.menuScrollStop(t),t.css({"touch-action":"","-ms-touch-action":"","-webkit-transform":"",transform:""}).off(".smartmenus_scroll").removeDataSM("scroll").dataSM("scroll-arrows").hide()),t.dataSM("parent-a").removeClass("highlighted").attr("aria-expanded","false"),t.attr({"aria-expanded":"false","aria-hidden":"true"});var i=t.dataSM("level");this.activatedItems.splice(i-1,1),this.visibleSubMenus.splice($.inArray(t,this.visibleSubMenus),1),this.$root.triggerHandler("hide.smapi",t[0])}},menuHideAll:function(){this.showTimeout&&(clearTimeout(this.showTimeout),this.showTimeout=0);for(var t=this.opts.isPopup?1:0,e=this.visibleSubMenus.length-1;e>=t;e--)this.menuHide(this.visibleSubMenus[e]);this.opts.isPopup&&(canAnimate&&this.$root.stop(!0,!0),this.$root.is(":visible")&&(canAnimate&&this.opts.hideFunction?this.opts.hideFunction.call(this,this.$root):this.$root.hide(this.opts.hideDuration))),this.activatedItems=[],this.visibleSubMenus=[],this.clickActivated=!1,this.focusActivated=!1,this.zIndexInc=0,this.$root.triggerHandler("hideAll.smapi")},menuHideSubMenus:function(t){for(var e=this.activatedItems.length-1;e>=t;e--){var i=this.activatedItems[e].dataSM("sub");i&&this.menuHide(i)}},menuInit:function(t){if(!t.dataSM("in-mega")){t.hasClass("mega-menu")&&t.find("ul").dataSM("in-mega",!0);for(var e=2,i=t[0];(i=i.parentNode.parentNode)!=this.$root[0];)e++;var s=t.prevAll("a").eq(-1);s.length||(s=t.prevAll().find("a").eq(-1)),s.addClass("has-submenu").dataSM("sub",t),t.dataSM("parent-a",s).dataSM("level",e).parent().dataSM("sub",t);var o=s.attr("id")||this.accessIdPrefix+ ++this.idInc,a=t.attr("id")||this.accessIdPrefix+ ++this.idInc;s.attr({id:o,"aria-haspopup":"true","aria-controls":a,"aria-expanded":"false"}),t.attr({id:a,role:"group","aria-hidden":"true","aria-labelledby":o,"aria-expanded":"false"}),this.opts.subIndicators&&s[this.opts.subIndicatorsPos](this.$subArrow.clone())}},menuPosition:function(t){var e,i,s=t.dataSM("parent-a"),o=s.closest("li"),a=o.parent(),n=t.dataSM("level"),r=this.getWidth(t),h=this.getHeight(t),u=s.offset(),l=u.left,c=u.top,d=this.getWidth(s),m=this.getHeight(s),p=$(window),f=p.scrollLeft(),v=p.scrollTop(),b=this.getViewportWidth(),S=this.getViewportHeight(),g=a.parent().is("[data-sm-horizontal-sub]")||2==n&&!a.hasClass("sm-vertical"),M=this.opts.rightToLeftSubMenus&&!o.is("[data-sm-reverse]")||!this.opts.rightToLeftSubMenus&&o.is("[data-sm-reverse]"),w=2==n?this.opts.mainMenuSubOffsetX:this.opts.subMenusSubOffsetX,T=2==n?this.opts.mainMenuSubOffsetY:this.opts.subMenusSubOffsetY;if(g?(e=M?d-r-w:w,i=this.opts.bottomToTopSubMenus?-h-T:m+T):(e=M?w-r:d-w,i=this.opts.bottomToTopSubMenus?m-T-h:T),this.opts.keepInViewport){var y=l+e,I=c+i;if(M&&f>y?e=g?f-y+e:d-w:!M&&y+r>f+b&&(e=g?f+b-r-y+e:w-r),g||(S>h&&I+h>v+S?i+=v+S-h-I:(h>=S||v>I)&&(i+=v-I)),g&&(I+h>v+S+.49||v>I)||!g&&h>S+.49){var x=this;t.dataSM("scroll-arrows")||t.dataSM("scroll-arrows",$([$('')[0],$('')[0]]).on({mouseenter:function(){t.dataSM("scroll").up=$(this).hasClass("scroll-up"),x.menuScroll(t)},mouseleave:function(e){x.menuScrollStop(t),x.menuScrollOut(t,e)},"mousewheel DOMMouseScroll":function(t){t.preventDefault()}}).insertAfter(t));var A=".smartmenus_scroll";if(t.dataSM("scroll",{y:this.cssTransforms3d?0:i-m,step:1,itemH:m,subH:h,arrowDownH:this.getHeight(t.dataSM("scroll-arrows").eq(1))}).on(getEventsNS({mouseover:function(e){x.menuScrollOver(t,e)},mouseout:function(e){x.menuScrollOut(t,e)},"mousewheel DOMMouseScroll":function(e){x.menuScrollMousewheel(t,e)}},A)).dataSM("scroll-arrows").css({top:"auto",left:"0",marginLeft:e+(parseInt(t.css("border-left-width"))||0),width:r-(parseInt(t.css("border-left-width"))||0)-(parseInt(t.css("border-right-width"))||0),zIndex:t.css("z-index")}).eq(g&&this.opts.bottomToTopSubMenus?0:1).show(),this.isFixed()){var C={};C[touchEvents?"touchstart touchmove touchend":"pointerdown pointermove pointerup MSPointerDown MSPointerMove MSPointerUp"]=function(e){x.menuScrollTouch(t,e)},t.css({"touch-action":"none","-ms-touch-action":"none"}).on(getEventsNS(C,A))}}}t.css({top:"auto",left:"0",marginLeft:e,marginTop:i-m})},menuScroll:function(t,e,i){var s,o=t.dataSM("scroll"),a=t.dataSM("scroll-arrows"),n=o.up?o.upEnd:o.downEnd;if(!e&&o.momentum){if(o.momentum*=.92,s=o.momentum,.5>s)return this.menuScrollStop(t),void 0}else s=i||(e||!this.opts.scrollAccelerate?this.opts.scrollStep:Math.floor(o.step));var r=t.dataSM("level");if(this.activatedItems[r-1]&&this.activatedItems[r-1].dataSM("sub")&&this.activatedItems[r-1].dataSM("sub").is(":visible")&&this.menuHideSubMenus(r-1),o.y=o.up&&o.y>=n||!o.up&&n>=o.y?o.y:Math.abs(n-o.y)>s?o.y+(o.up?s:-s):n,t.css(this.cssTransforms3d?{"-webkit-transform":"translate3d(0, "+o.y+"px, 0)",transform:"translate3d(0, "+o.y+"px, 0)"}:{marginTop:o.y}),mouse&&(o.up&&o.y>o.downEnd||!o.up&&o.y0;t.dataSM("scroll-arrows").eq(i?0:1).is(":visible")&&(t.dataSM("scroll").up=i,this.menuScroll(t,!0))}e.preventDefault()},menuScrollOut:function(t,e){mouse&&(/^scroll-(up|down)/.test((e.relatedTarget||"").className)||(t[0]==e.relatedTarget||$.contains(t[0],e.relatedTarget))&&this.getClosestMenu(e.relatedTarget)==t[0]||t.dataSM("scroll-arrows").css("visibility","hidden"))},menuScrollOver:function(t,e){if(mouse&&!/^scroll-(up|down)/.test(e.target.className)&&this.getClosestMenu(e.target)==t[0]){this.menuScrollRefreshData(t);var i=t.dataSM("scroll"),s=$(window).scrollTop()-t.dataSM("parent-a").offset().top-i.itemH;t.dataSM("scroll-arrows").eq(0).css("margin-top",s).end().eq(1).css("margin-top",s+this.getViewportHeight()-i.arrowDownH).end().css("visibility","visible")}},menuScrollRefreshData:function(t){var e=t.dataSM("scroll"),i=$(window).scrollTop()-t.dataSM("parent-a").offset().top-e.itemH;this.cssTransforms3d&&(i=-(parseFloat(t.css("margin-top"))-i)),$.extend(e,{upEnd:i,downEnd:i+this.getViewportHeight()-e.subH})},menuScrollStop:function(t){return this.scrollTimeout?(cancelAnimationFrame(this.scrollTimeout),this.scrollTimeout=0,t.dataSM("scroll").step=1,!0):void 0},menuScrollTouch:function(t,e){if(e=e.originalEvent,isTouchEvent(e)){var i=this.getTouchPoint(e);if(this.getClosestMenu(i.target)==t[0]){var s=t.dataSM("scroll");if(/(start|down)$/i.test(e.type))this.menuScrollStop(t)?(e.preventDefault(),this.$touchScrollingSub=t):this.$touchScrollingSub=null,this.menuScrollRefreshData(t),$.extend(s,{touchStartY:i.pageY,touchStartTime:e.timeStamp});else if(/move$/i.test(e.type)){var o=void 0!==s.touchY?s.touchY:s.touchStartY;if(void 0!==o&&o!=i.pageY){this.$touchScrollingSub=t;var a=i.pageY>o;void 0!==s.up&&s.up!=a&&$.extend(s,{touchStartY:i.pageY,touchStartTime:e.timeStamp}),$.extend(s,{up:a,touchY:i.pageY}),this.menuScroll(t,!0,Math.abs(i.pageY-o))}e.preventDefault()}else void 0!==s.touchY&&((s.momentum=15*Math.pow(Math.abs(i.pageY-s.touchStartY)/(e.timeStamp-s.touchStartTime),2))&&(this.menuScrollStop(t),this.menuScroll(t),e.preventDefault()),delete s.touchY)}}},menuShow:function(t){if((t.dataSM("beforefirstshowfired")||(t.dataSM("beforefirstshowfired",!0),this.$root.triggerHandler("beforefirstshow.smapi",t[0])!==!1))&&this.$root.triggerHandler("beforeshow.smapi",t[0])!==!1&&(t.dataSM("shown-before",!0),canAnimate&&t.stop(!0,!0),!t.is(":visible"))){var e=t.dataSM("parent-a"),i=this.isCollapsible();if((this.opts.keepHighlighted||i)&&e.addClass("highlighted"),i)t.removeClass("sm-nowrap").css({zIndex:"",width:"auto",minWidth:"",maxWidth:"",top:"",left:"",marginLeft:"",marginTop:""});else{if(t.css("z-index",this.zIndexInc=(this.zIndexInc||this.getStartZIndex())+1),(this.opts.subMenusMinWidth||this.opts.subMenusMaxWidth)&&(t.css({width:"auto",minWidth:"",maxWidth:""}).addClass("sm-nowrap"),this.opts.subMenusMinWidth&&t.css("min-width",this.opts.subMenusMinWidth),this.opts.subMenusMaxWidth)){var s=this.getWidth(t);t.css("max-width",this.opts.subMenusMaxWidth),s>this.getWidth(t)&&t.removeClass("sm-nowrap").css("width",this.opts.subMenusMaxWidth)}this.menuPosition(t)}var o=function(){t.css("overflow","")};i?canAnimate&&this.opts.collapsibleShowFunction?this.opts.collapsibleShowFunction.call(this,t,o):t.show(this.opts.collapsibleShowDuration,o):canAnimate&&this.opts.showFunction?this.opts.showFunction.call(this,t,o):t.show(this.opts.showDuration,o),e.attr("aria-expanded","true"),t.attr({"aria-expanded":"true","aria-hidden":"false"}),this.visibleSubMenus.push(t),this.$root.triggerHandler("show.smapi",t[0])}},popupHide:function(t){this.hideTimeout&&(clearTimeout(this.hideTimeout),this.hideTimeout=0);var e=this;this.hideTimeout=setTimeout(function(){e.menuHideAll()},t?1:this.opts.hideTimeout)},popupShow:function(t,e){if(!this.opts.isPopup)return alert('SmartMenus jQuery Error:\n\nIf you want to show this menu via the "popupShow" method, set the isPopup:true option.'),void 0;if(this.hideTimeout&&(clearTimeout(this.hideTimeout),this.hideTimeout=0),this.$root.dataSM("shown-before",!0),canAnimate&&this.$root.stop(!0,!0),!this.$root.is(":visible")){this.$root.css({left:t,top:e});var i=this,s=function(){i.$root.css("overflow","")};canAnimate&&this.opts.showFunction?this.opts.showFunction.call(this,this.$root,s):this.$root.show(this.opts.showDuration,s),this.visibleSubMenus[0]=this.$root}},refresh:function(){this.destroy(!0),this.init(!0)},rootKeyDown:function(t){if(this.handleEvents())switch(t.keyCode){case 27:var e=this.activatedItems[0];if(e){this.menuHideAll(),e[0].focus();var i=e.dataSM("sub");i&&this.menuHide(i)}break;case 32:var s=$(t.target);if(s.is("a")&&this.handleItemEvents(s)){var i=s.dataSM("sub");i&&!i.is(":visible")&&(this.itemClick({currentTarget:t.target}),t.preventDefault())}}},rootOut:function(t){if(this.handleEvents()&&!this.isTouchMode()&&t.target!=this.$root[0]&&(this.hideTimeout&&(clearTimeout(this.hideTimeout),this.hideTimeout=0),!this.opts.showOnClick||!this.opts.hideOnClick)){var e=this;this.hideTimeout=setTimeout(function(){e.menuHideAll()},this.opts.hideTimeout)}},rootOver:function(t){this.handleEvents()&&!this.isTouchMode()&&t.target!=this.$root[0]&&this.hideTimeout&&(clearTimeout(this.hideTimeout),this.hideTimeout=0)},winResize:function(t){if(this.handleEvents()){if(!("onorientationchange"in window)||"orientationchange"==t.type){var e=this.isCollapsible();this.wasCollapsible&&e||(this.activatedItems.length&&this.activatedItems[this.activatedItems.length-1][0].blur(),this.menuHideAll()),this.wasCollapsible=e}}else if(this.$disableOverlay){var i=this.$root.offset();this.$disableOverlay.css({top:i.top,left:i.left,width:this.$root.outerWidth(),height:this.$root.outerHeight()})}}}}),$.fn.dataSM=function(t,e){return e?this.data(t+"_smartmenus",e):this.data(t+"_smartmenus")},$.fn.removeDataSM=function(t){return this.removeData(t+"_smartmenus")},$.fn.smartmenus=function(options){if("string"==typeof options){var args=arguments,method=options;return Array.prototype.shift.call(args),this.each(function(){var t=$(this).data("smartmenus");t&&t[method]&&t[method].apply(t,args)})}return this.each(function(){var dataOpts=$(this).data("sm-options")||null;if(dataOpts)try{dataOpts=eval("("+dataOpts+")")}catch(e){dataOpts=null,alert('ERROR\n\nSmartMenus jQuery init:\nInvalid "data-sm-options" attribute value syntax.')}new $.SmartMenus(this,$.extend({},$.fn.smartmenus.defaults,options,dataOpts))})},$.fn.smartmenus.defaults={isPopup:!1,mainMenuSubOffsetX:0,mainMenuSubOffsetY:0,subMenusSubOffsetX:0,subMenusSubOffsetY:0,subMenusMinWidth:"10em",subMenusMaxWidth:"20em",subIndicators:!0,subIndicatorsPos:"append",subIndicatorsText:"",scrollStep:30,scrollAccelerate:!0,showTimeout:250,hideTimeout:500,showDuration:0,showFunction:null,hideDuration:0,hideFunction:function(t,e){t.fadeOut(200,e)},collapsibleShowDuration:0,collapsibleShowFunction:function(t,e){t.slideDown(200,e)},collapsibleHideDuration:0,collapsibleHideFunction:function(t,e){t.slideUp(200,e)},showOnClick:!1,hideOnClick:!0,noMouseOver:!1,keepInViewport:!0,keepHighlighted:!0,markCurrentItem:!1,markCurrentTree:!0,rightToLeftSubMenus:!1,bottomToTopSubMenus:!1,collapsibleBehavior:"default"},$}); \ No newline at end of file diff --git a/docs/docs/doxygen/html/menu.js b/docs/docs/doxygen/html/menu.js new file mode 100644 index 00000000..433c15b8 --- /dev/null +++ b/docs/docs/doxygen/html/menu.js @@ -0,0 +1,50 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function initMenu(relPath,searchEnabled,serverSide,searchPage,search) { + function makeTree(data,relPath) { + var result=''; + if ('children' in data) { + result+=''; + } + return result; + } + + $('#main-nav').append(makeTree(menudata,relPath)); + $('#main-nav').children(':first').addClass('sm sm-dox').attr('id','main-menu'); + if (searchEnabled) { + if (serverSide) { + $('#main-menu').append('
  • '); + } else { + $('#main-menu').append('
  • '); + } + } + $('#main-menu').smartmenus(); +} +/* @license-end */ diff --git a/docs/docs/doxygen/html/menudata.js b/docs/docs/doxygen/html/menudata.js new file mode 100644 index 00000000..26db203c --- /dev/null +++ b/docs/docs/doxygen/html/menudata.js @@ -0,0 +1,38 @@ +/* +@licstart The following is the entire license notice for the +JavaScript code in this file. + +Copyright (C) 1997-2019 by Dimitri van Heesch + +This program is free software; you can redistribute it and/or modify +it under the terms of version 2 of the GNU General Public License as published by +the Free Software Foundation + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +@licend The above is the entire license notice +for the JavaScript code in this file +*/ +var menudata={children:[ +{text:"Main Page",url:"index.html"}, +{text:"Namespaces",url:"namespaces.html",children:[ +{text:"Namespace List",url:"namespaces.html"}, +{text:"Namespace Members",url:"namespacemembers.html",children:[ +{text:"All",url:"namespacemembers.html"}, +{text:"Functions",url:"namespacemembers_func.html"}, +{text:"Enumerations",url:"namespacemembers_enum.html"}]}]}, +{text:"Classes",url:"annotated.html",children:[ +{text:"Class List",url:"annotated.html"}, +{text:"Class Index",url:"classes.html"}, +{text:"Class Members",url:"functions.html",children:[ +{text:"All",url:"functions.html"}, +{text:"Functions",url:"functions_func.html"}]}]}, +{text:"Files",url:"files.html",children:[ +{text:"File List",url:"files.html"}]}]} diff --git a/docs/docs/doxygen/html/namespaceintel.html b/docs/docs/doxygen/html/namespaceintel.html new file mode 100644 index 00000000..fc40c139 --- /dev/null +++ b/docs/docs/doxygen/html/namespaceintel.html @@ -0,0 +1,92 @@ + + + + + + + +Intel HEXL: intel Namespace Reference + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + +
    +
    + +
    +
    intel Namespace Reference
    +
    +
    + + + + +

    +Namespaces

     hexl
     
    +
    + + + + diff --git a/docs/docs/doxygen/html/namespaceintel_1_1hexl.html b/docs/docs/doxygen/html/namespaceintel_1_1hexl.html new file mode 100644 index 00000000..fc4cd6fa --- /dev/null +++ b/docs/docs/doxygen/html/namespaceintel_1_1hexl.html @@ -0,0 +1,622 @@ + + + + + + + +Intel HEXL: intel::hexl Namespace Reference + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    intel::hexl Namespace Reference
    +
    +
    + + + + + +

    +Classes

    class  NTT
     Performs negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE cryptography. More...
     
    + + + + +

    +Enumerations

    enum  CMPINT {
    +  CMPINT::EQ = 0, +CMPINT::LT = 1, +CMPINT::LE = 2, +CMPINT::FALSE = 3, +
    +  CMPINT::NE = 4, +CMPINT::NLT = 5, +CMPINT::NLE = 6, +CMPINT::TRUE = 7 +
    + }
     Represents binary operations between two boolean values. More...
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    void EltwiseAddMod (uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus)
     Adds two vectors elementwise with modular reduction. More...
     
    void EltwiseCmpAdd (uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t n)
     Computes element-wise conditional addition. More...
     
    void EltwiseCmpSubMod (uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t modulus, uint64_t n)
     Computes element-wise conditional modular subtraction. More...
     
    void EltwiseFMAMod (uint64_t *result, const uint64_t *arg1, uint64_t arg2, const uint64_t *arg3, uint64_t n, uint64_t modulus, uint64_t input_mod_factor)
     Computes fused multiply-add (arg1 * arg2 + arg3) mod modulus element-wise, broadcasting scalars to vectors. More...
     
    void EltwiseMultMod (uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus, uint64_t input_mod_factor)
     Multiplies two vectors elementwise with modular reduction. More...
     
    void EltwiseReduceMod (uint64_t *result, const uint64_t *operand, uint64_t modulus, uint64_t n, uint64_t input_mod_factor, uint64_t output_mod_factor)
     Performs elementwise modular reduction. More...
     
    CMPINT Not (CMPINT cmp)
     Returns the logical negation of a binary operation. More...
     
    +

    Enumeration Type Documentation

    + +

    ◆ CMPINT

    + +
    +
    + + + + + +
    + + + + +
    enum intel::hexl::CMPINT
    +
    +strong
    +
    + +

    Represents binary operations between two boolean values.

    + + + + + + + + + +
    Enumerator
    EQ 

    Equal.

    +
    LT 

    Less than.

    +
    LE 

    Less than or equal.

    +
    FALSE 

    False.

    +
    NE 

    Not equal.

    +
    NLT 

    Not less than.

    +
    NLE 

    Not less than or equal.

    +
    TRUE 

    True.

    +
    + +
    +
    +

    Function Documentation

    + +

    ◆ EltwiseAddMod()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void intel::hexl::EltwiseAddMod (uint64_t * result,
    const uint64_t * operand1,
    const uint64_t * operand2,
    uint64_t n,
    uint64_t modulus 
    )
    +
    + +

    Adds two vectors elementwise with modular reduction.

    +
    Parameters
    + + + + + + +
    [out]resultStores result
    [in]operand1Vector of elements to add. Each element must be less than the modulus
    [in]operand2Vector of elements to add. Each element must be less than the modulus
    [in]nNumber of elements in each vector
    [in]modulusModulus with which to perform modular reduction. Must be in the range \([2, 2^{63} - 1]\)
    +
    +
    +

    Computes \( operand1[i] = (operand1[i] + operand2[i]) \mod modulus \) for \( i=0, ..., n-1\).

    + +
    +
    + +

    ◆ EltwiseCmpAdd()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void intel::hexl::EltwiseCmpAdd (uint64_t * result,
    const uint64_t * operand1,
    CMPINT cmp,
    uint64_t bound,
    uint64_t diff,
    uint64_t n 
    )
    +
    + +

    Computes element-wise conditional addition.

    +
    Parameters
    + + + + + + + +
    [out]resultStores the result
    [in]operand1Vector of elements to compare; stores result
    [in]cmpComparison operation
    [in]boundScalar to compare against
    [in]diffScalar to conditionally add
    [in]nNumber of elements in operand1
    +
    +
    +

    Computes result[i] = cmp(operand1[i], bound) ? operand1[i] + diff : operand1[i] for all \(i=0, ..., n-1\).

    + +
    +
    + +

    ◆ EltwiseCmpSubMod()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void intel::hexl::EltwiseCmpSubMod (uint64_t * result,
    const uint64_t * operand1,
    CMPINT cmp,
    uint64_t bound,
    uint64_t diff,
    uint64_t modulus,
    uint64_t n 
    )
    +
    + +

    Computes element-wise conditional modular subtraction.

    +
    Parameters
    + + + + + + + + +
    [out]resultStores the result
    [in]operand1Vector of elements to compare
    [in]cmpComparison function
    [in]boundScalar to compare against
    [in]diffScalar to subtract by
    [in]modulusModulus to reduce by
    [in]nNumber of elements in operand1
    +
    +
    +

    Computes operand1[i] = (cmp(operand1, bound)) ? (operand1 - diff) mod modulus : operand1 for all i=0, ..., n-1

    + +
    +
    + +

    ◆ EltwiseFMAMod()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void intel::hexl::EltwiseFMAMod (uint64_t * result,
    const uint64_t * arg1,
    uint64_t arg2,
    const uint64_t * arg3,
    uint64_t n,
    uint64_t modulus,
    uint64_t input_mod_factor 
    )
    +
    + +

    Computes fused multiply-add (arg1 * arg2 + arg3) mod modulus element-wise, broadcasting scalars to vectors.

    +
    Parameters
    + + + + + + + + +
    [out]resultStores the result
    [in]arg1Vector to multiply
    [in]arg2Scalar to multiply
    [in]arg3Vector to add. Will not add if arg3 == nullptr
    [in]nNumber of elements in each vector
    [in]modulusModulus with which to perform modular reduction. Must be in the range \( [2, 2^{61} - 1]\)
    [in]input_mod_factorAssumes input elements are in [0, input_mod_factor * p). Must be 1, 2, 4, or 8.
    +
    +
    + +
    +
    + +

    ◆ EltwiseMultMod()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void intel::hexl::EltwiseMultMod (uint64_t * result,
    const uint64_t * operand1,
    const uint64_t * operand2,
    uint64_t n,
    uint64_t modulus,
    uint64_t input_mod_factor 
    )
    +
    + +

    Multiplies two vectors elementwise with modular reduction.

    +
    Parameters
    + + + + + + + +
    [in]resultResult of element-wise multiplication
    [in]operand1Vector of elements to multiply. Each element must be less than the modulus.
    [in]operand2Vector of elements to multiply. Each element must be less than the modulus.
    [in]nNumber of elements in each vector
    [in]modulusModulus with which to perform modular reduction
    [in]input_mod_factorAssumes input elements are in [0, input_mod_factor * p) Must be 1, 2 or 4.
    +
    +
    +

    Computes result[i] = (operand1[i] * operand2[i]) mod modulus for i=0, ..., n - 1

    + +
    +
    + +

    ◆ EltwiseReduceMod()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void intel::hexl::EltwiseReduceMod (uint64_t * result,
    const uint64_t * operand,
    uint64_t modulus,
    uint64_t n,
    uint64_t input_mod_factor,
    uint64_t output_mod_factor 
    )
    +
    + +

    Performs elementwise modular reduction.

    +
    Parameters
    + + + + + + + +
    [out]resultStores the result
    [in]operand
    [in]nNumber of elements in operand
    [in]modulusModulus with which to perform modular reduction
    [in]input_mod_factorAssumes input elements are in [0, input_mod_factor * p) Must be 0, 1, 2 or 4. input_mod_factor=0 means, no knowledge of input range. Barrett reduction will be used in this case. input_mod_factor >= output_mod_factor unless input_mod_factor == 0
    [in]output_mod_factoroutput elements will be in [0, output_mod_factor
      +
    • p) Must be 1 or 2. for input_mod_factor=0, output_mod_factor will be set to 1.
    • +
    +
    +
    +
    + +
    +
    + +

    ◆ Not()

    + +
    +
    + + + + + +
    + + + + + + + + +
    CMPINT intel::hexl::Not (CMPINT cmp)
    +
    +inline
    +
    + +

    Returns the logical negation of a binary operation.

    +
    Parameters
    + + +
    [in]cmpThe binary operation to negate
    +
    +
    + +
    +
    +
    + + + + diff --git a/docs/docs/doxygen/html/namespacemembers.html b/docs/docs/doxygen/html/namespacemembers.html new file mode 100644 index 00000000..8fc2de38 --- /dev/null +++ b/docs/docs/doxygen/html/namespacemembers.html @@ -0,0 +1,106 @@ + + + + + + + +Intel HEXL: Namespace Members + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    Here is a list of all namespace members with links to the namespace documentation for each member:
    +
    + + + + diff --git a/docs/docs/doxygen/html/namespacemembers_enum.html b/docs/docs/doxygen/html/namespacemembers_enum.html new file mode 100644 index 00000000..2fc84c45 --- /dev/null +++ b/docs/docs/doxygen/html/namespacemembers_enum.html @@ -0,0 +1,85 @@ + + + + + + + +Intel HEXL: Namespace Members + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    + + + + diff --git a/docs/docs/doxygen/html/namespacemembers_func.html b/docs/docs/doxygen/html/namespacemembers_func.html new file mode 100644 index 00000000..9c63ebcc --- /dev/null +++ b/docs/docs/doxygen/html/namespacemembers_func.html @@ -0,0 +1,103 @@ + + + + + + + +Intel HEXL: Namespace Members + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    + + + + diff --git a/docs/docs/doxygen/html/namespaces.html b/docs/docs/doxygen/html/namespaces.html new file mode 100644 index 00000000..b1fb9dd5 --- /dev/null +++ b/docs/docs/doxygen/html/namespaces.html @@ -0,0 +1,90 @@ + + + + + + + +Intel HEXL: Namespace List + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    +
    Namespace List
    +
    +
    +
    Here is a list of all namespaces with brief descriptions:
    +
    [detail level 12]
    + + +
     Nintel
     Nhexl
    +
    +
    + + + + diff --git a/docs/docs/doxygen/html/nav_f.png b/docs/docs/doxygen/html/nav_f.png new file mode 100644 index 00000000..72a58a52 Binary files /dev/null and b/docs/docs/doxygen/html/nav_f.png differ diff --git a/docs/docs/doxygen/html/nav_g.png b/docs/docs/doxygen/html/nav_g.png new file mode 100644 index 00000000..2093a237 Binary files /dev/null and b/docs/docs/doxygen/html/nav_g.png differ diff --git a/docs/docs/doxygen/html/nav_h.png b/docs/docs/doxygen/html/nav_h.png new file mode 100644 index 00000000..33389b10 Binary files /dev/null and b/docs/docs/doxygen/html/nav_h.png differ diff --git a/docs/docs/doxygen/html/ntt_8hpp.html b/docs/docs/doxygen/html/ntt_8hpp.html new file mode 100644 index 00000000..775ed9a3 --- /dev/null +++ b/docs/docs/doxygen/html/ntt_8hpp.html @@ -0,0 +1,110 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt/ntt.hpp File Reference + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    ntt.hpp File Reference
    +
    +
    +
    #include <stdint.h>
    +#include <memory>
    +#include <vector>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    class  intel::hexl::NTT
     Performs negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE cryptography. More...
     
    + + + + + +

    +Namespaces

     intel
     
     intel::hexl
     
    +
    + + + + diff --git a/docs/docs/doxygen/html/ntt_8hpp_source.html b/docs/docs/doxygen/html/ntt_8hpp_source.html new file mode 100644 index 00000000..220339ab --- /dev/null +++ b/docs/docs/doxygen/html/ntt_8hpp_source.html @@ -0,0 +1,131 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt/ntt.hpp Source File + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    ntt.hpp
    +
    +
    +Go to the documentation of this file.
    1 // Copyright (C) 2020-2021 Intel Corporation
    +
    2 // SPDX-License-Identifier: Apache-2.0
    +
    3 
    +
    4 #pragma once
    +
    5 
    +
    6 #include <stdint.h>
    +
    7 
    +
    8 #include <memory>
    +
    9 #include <vector>
    +
    10 
    +
    11 namespace intel {
    +
    12 namespace hexl {
    +
    13 
    +
    19 class NTT {
    +
    20  public:
    +
    22  NTT();
    +
    23 
    +
    25  ~NTT();
    +
    26 
    +
    33  NTT(uint64_t degree, uint64_t p);
    +
    34 
    +
    43  NTT(uint64_t degree, uint64_t p, uint64_t root_of_unity);
    +
    44 
    +
    52  void ComputeForward(uint64_t* result, const uint64_t* operand,
    +
    53  uint64_t input_mod_factor, uint64_t output_mod_factor);
    +
    54 
    +
    62  void ComputeInverse(uint64_t* result, const uint64_t* operand,
    +
    63  uint64_t input_mod_factor, uint64_t output_mod_factor);
    +
    64 
    +
    65  class NTTImpl;
    +
    66 
    +
    67  private:
    +
    68  std::shared_ptr<NTTImpl> m_impl;
    +
    69 };
    +
    70 
    +
    71 } // namespace hexl
    +
    72 } // namespace intel
    +
    +
    void ComputeForward(uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor)
    Compute forward NTT. Results are bit-reversed.
    +
    void ComputeInverse(uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor)
    +
    Definition: eltwise-add-mod.hpp:8
    +
    ~NTT()
    Destructs the NTT object.
    +
    Performs negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE crypt...
    Definition: ntt.hpp:19
    +
    NTT()
    Initializes an empty NTT object.
    + + + + diff --git a/docs/docs/doxygen/html/open.png b/docs/docs/doxygen/html/open.png new file mode 100644 index 00000000..30f75c7e Binary files /dev/null and b/docs/docs/doxygen/html/open.png differ diff --git a/docs/docs/doxygen/html/search/all_0.html b/docs/docs/doxygen/html/search/all_0.html new file mode 100644 index 00000000..26dd244f --- /dev/null +++ b/docs/docs/doxygen/html/search/all_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_0.js b/docs/docs/doxygen/html/search/all_0.js new file mode 100644 index 00000000..f420220d --- /dev/null +++ b/docs/docs/doxygen/html/search/all_0.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['cmpint_0',['CMPINT',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006',1,'intel::hexl']]], + ['computeforward_1',['ComputeForward',['../classintel_1_1hexl_1_1_n_t_t.html#a7f8dac5ff3fc117d3e7259762a716140',1,'intel::hexl::NTT']]], + ['computeinverse_2',['ComputeInverse',['../classintel_1_1hexl_1_1_n_t_t.html#a31e78375dcafd5df85cb1259a9156a9a',1,'intel::hexl::NTT']]] +]; diff --git a/docs/docs/doxygen/html/search/all_1.html b/docs/docs/doxygen/html/search/all_1.html new file mode 100644 index 00000000..8eb215b9 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_1.js b/docs/docs/doxygen/html/search/all_1.js new file mode 100644 index 00000000..53b97819 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_1.js @@ -0,0 +1,16 @@ +var searchData= +[ + ['eltwise_2dadd_2dmod_2ehpp_3',['eltwise-add-mod.hpp',['../eltwise-add-mod_8hpp.html',1,'']]], + ['eltwise_2dcmp_2dadd_2ehpp_4',['eltwise-cmp-add.hpp',['../eltwise-cmp-add_8hpp.html',1,'']]], + ['eltwise_2dcmp_2dsub_2dmod_2ehpp_5',['eltwise-cmp-sub-mod.hpp',['../eltwise-cmp-sub-mod_8hpp.html',1,'']]], + ['eltwise_2dfma_2dmod_2ehpp_6',['eltwise-fma-mod.hpp',['../eltwise-fma-mod_8hpp.html',1,'']]], + ['eltwise_2dmult_2dmod_2ehpp_7',['eltwise-mult-mod.hpp',['../eltwise-mult-mod_8hpp.html',1,'']]], + ['eltwise_2dreduce_2dmod_2ehpp_8',['eltwise-reduce-mod.hpp',['../eltwise-reduce-mod_8hpp.html',1,'']]], + ['eltwiseaddmod_9',['EltwiseAddMod',['../namespaceintel_1_1hexl.html#a319244a133f57825ba7e593ad5c71709',1,'intel::hexl']]], + ['eltwisecmpadd_10',['EltwiseCmpAdd',['../namespaceintel_1_1hexl.html#ad09f0d71efdfbde79c2a8ed92d7da811',1,'intel::hexl']]], + ['eltwisecmpsubmod_11',['EltwiseCmpSubMod',['../namespaceintel_1_1hexl.html#aa06f039b71cf61990911e753595f1f78',1,'intel::hexl']]], + ['eltwisefmamod_12',['EltwiseFMAMod',['../namespaceintel_1_1hexl.html#a5b65d563391b4a1a5041633aeb118aa5',1,'intel::hexl']]], + ['eltwisemultmod_13',['EltwiseMultMod',['../namespaceintel_1_1hexl.html#a705bc0321d937ae4d1f8d50279e3cff1',1,'intel::hexl']]], + ['eltwisereducemod_14',['EltwiseReduceMod',['../namespaceintel_1_1hexl.html#af7e59b130824f9cd4ad0aa54c52ad50c',1,'intel::hexl']]], + ['eq_15',['EQ',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006a2dcbad7477fd40561e8b8198f173bd47',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/all_2.html b/docs/docs/doxygen/html/search/all_2.html new file mode 100644 index 00000000..b26d9165 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_2.js b/docs/docs/doxygen/html/search/all_2.js new file mode 100644 index 00000000..a1f541c0 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['false_16',['FALSE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006a946003f97ccc52d5d3b54ac0ec31bbfc',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/all_3.html b/docs/docs/doxygen/html/search/all_3.html new file mode 100644 index 00000000..b61b96f8 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_3.js b/docs/docs/doxygen/html/search/all_3.js new file mode 100644 index 00000000..b2a720f3 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_3.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['hexl_17',['hexl',['../namespaceintel_1_1hexl.html',1,'intel']]], + ['intel_20homomorphic_20encryption_20acceleration_20library_20_28hexl_29_18',['Intel Homomorphic Encryption Acceleration Library (HEXL)',['../index.html',1,'']]], + ['intel_19',['intel',['../namespaceintel.html',1,'']]], + ['intel_2dhexl_2ehpp_20',['intel-hexl.hpp',['../intel-hexl_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/all_4.html b/docs/docs/doxygen/html/search/all_4.html new file mode 100644 index 00000000..06de1550 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_4.js b/docs/docs/doxygen/html/search/all_4.js new file mode 100644 index 00000000..cfe9d2fb --- /dev/null +++ b/docs/docs/doxygen/html/search/all_4.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['le_21',['LE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006acfe6055d2e0503be378bb63449ec7ba6',1,'intel::hexl']]], + ['lt_22',['LT',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006ac562607189d77eb9dfb707464c1e7b0b',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/all_5.html b/docs/docs/doxygen/html/search/all_5.html new file mode 100644 index 00000000..2544c4e5 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_5.js b/docs/docs/doxygen/html/search/all_5.js new file mode 100644 index 00000000..78327221 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_5.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['ne_23',['NE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006adc33066c3993e0d50896e533fd692ce0',1,'intel::hexl']]], + ['nle_24',['NLE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006aacd748f300c5d189c47807e2a9d6ea57',1,'intel::hexl']]], + ['nlt_25',['NLT',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006ad7d6a13c7b311ec8a3c9fcfb1919a2f8',1,'intel::hexl']]], + ['not_26',['Not',['../namespaceintel_1_1hexl.html#a8c654502a5e7fe2cfdd198f0fd920f2a',1,'intel::hexl']]], + ['ntt_27',['NTT',['../classintel_1_1hexl_1_1_n_t_t.html',1,'intel::hexl::NTT'],['../classintel_1_1hexl_1_1_n_t_t.html#ade0447617b50232d2a076f99e672d15c',1,'intel::hexl::NTT::NTT()'],['../classintel_1_1hexl_1_1_n_t_t.html#a3f44d9ead9be66540b9732d5c98bb000',1,'intel::hexl::NTT::NTT(uint64_t degree, uint64_t p)'],['../classintel_1_1hexl_1_1_n_t_t.html#a63eea17916855a9ec73a2cc8c57b2a63',1,'intel::hexl::NTT::NTT(uint64_t degree, uint64_t p, uint64_t root_of_unity)']]], + ['ntt_2ehpp_28',['ntt.hpp',['../ntt_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/all_6.html b/docs/docs/doxygen/html/search/all_6.html new file mode 100644 index 00000000..43f14eab --- /dev/null +++ b/docs/docs/doxygen/html/search/all_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_6.js b/docs/docs/doxygen/html/search/all_6.js new file mode 100644 index 00000000..c7316c46 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_6.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['readme_2emd_29',['README.md',['../_r_e_a_d_m_e_8md.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/all_7.html b/docs/docs/doxygen/html/search/all_7.html new file mode 100644 index 00000000..af52f82a --- /dev/null +++ b/docs/docs/doxygen/html/search/all_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_7.js b/docs/docs/doxygen/html/search/all_7.js new file mode 100644 index 00000000..064abe2c --- /dev/null +++ b/docs/docs/doxygen/html/search/all_7.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['true_30',['TRUE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006ac0d83f0b82a6b30de8811e69e6d95c61',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/all_8.html b/docs/docs/doxygen/html/search/all_8.html new file mode 100644 index 00000000..cf2b5df9 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_8.js b/docs/docs/doxygen/html/search/all_8.js new file mode 100644 index 00000000..31ce5aae --- /dev/null +++ b/docs/docs/doxygen/html/search/all_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['util_2ehpp_31',['util.hpp',['../util_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/all_9.html b/docs/docs/doxygen/html/search/all_9.html new file mode 100644 index 00000000..690785a5 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/all_9.js b/docs/docs/doxygen/html/search/all_9.js new file mode 100644 index 00000000..3e7ca8e1 --- /dev/null +++ b/docs/docs/doxygen/html/search/all_9.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_7entt_32',['~NTT',['../classintel_1_1hexl_1_1_n_t_t.html#ab6fca1753db0834c692232e8897c725f',1,'intel::hexl::NTT']]] +]; diff --git a/docs/docs/doxygen/html/search/classes_0.html b/docs/docs/doxygen/html/search/classes_0.html new file mode 100644 index 00000000..f7e4c14e --- /dev/null +++ b/docs/docs/doxygen/html/search/classes_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/classes_0.js b/docs/docs/doxygen/html/search/classes_0.js new file mode 100644 index 00000000..ba3a2b03 --- /dev/null +++ b/docs/docs/doxygen/html/search/classes_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['ntt_33',['NTT',['../classintel_1_1hexl_1_1_n_t_t.html',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/close.png b/docs/docs/doxygen/html/search/close.png new file mode 100644 index 00000000..9342d3df Binary files /dev/null and b/docs/docs/doxygen/html/search/close.png differ diff --git a/docs/docs/doxygen/html/search/enums_0.html b/docs/docs/doxygen/html/search/enums_0.html new file mode 100644 index 00000000..9669700a --- /dev/null +++ b/docs/docs/doxygen/html/search/enums_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/enums_0.js b/docs/docs/doxygen/html/search/enums_0.js new file mode 100644 index 00000000..f3763213 --- /dev/null +++ b/docs/docs/doxygen/html/search/enums_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['cmpint_57',['CMPINT',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/enumvalues_0.html b/docs/docs/doxygen/html/search/enumvalues_0.html new file mode 100644 index 00000000..92862489 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/enumvalues_0.js b/docs/docs/doxygen/html/search/enumvalues_0.js new file mode 100644 index 00000000..01905394 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['eq_58',['EQ',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006a2dcbad7477fd40561e8b8198f173bd47',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/enumvalues_1.html b/docs/docs/doxygen/html/search/enumvalues_1.html new file mode 100644 index 00000000..e22a79fb --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/enumvalues_1.js b/docs/docs/doxygen/html/search/enumvalues_1.js new file mode 100644 index 00000000..ccae0db3 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['false_59',['FALSE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006a946003f97ccc52d5d3b54ac0ec31bbfc',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/enumvalues_2.html b/docs/docs/doxygen/html/search/enumvalues_2.html new file mode 100644 index 00000000..01a77bf7 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/enumvalues_2.js b/docs/docs/doxygen/html/search/enumvalues_2.js new file mode 100644 index 00000000..bbeb066a --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['le_60',['LE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006acfe6055d2e0503be378bb63449ec7ba6',1,'intel::hexl']]], + ['lt_61',['LT',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006ac562607189d77eb9dfb707464c1e7b0b',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/enumvalues_3.html b/docs/docs/doxygen/html/search/enumvalues_3.html new file mode 100644 index 00000000..4e761d60 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/enumvalues_3.js b/docs/docs/doxygen/html/search/enumvalues_3.js new file mode 100644 index 00000000..b83eff7f --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_3.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['ne_62',['NE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006adc33066c3993e0d50896e533fd692ce0',1,'intel::hexl']]], + ['nle_63',['NLE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006aacd748f300c5d189c47807e2a9d6ea57',1,'intel::hexl']]], + ['nlt_64',['NLT',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006ad7d6a13c7b311ec8a3c9fcfb1919a2f8',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/enumvalues_4.html b/docs/docs/doxygen/html/search/enumvalues_4.html new file mode 100644 index 00000000..e2977a05 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/enumvalues_4.js b/docs/docs/doxygen/html/search/enumvalues_4.js new file mode 100644 index 00000000..339af7d1 --- /dev/null +++ b/docs/docs/doxygen/html/search/enumvalues_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['true_65',['TRUE',['../namespaceintel_1_1hexl.html#abdcc9d2d5bb10fa95d5f143874508006ac0d83f0b82a6b30de8811e69e6d95c61',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/files_0.html b/docs/docs/doxygen/html/search/files_0.html new file mode 100644 index 00000000..737608e1 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/files_0.js b/docs/docs/doxygen/html/search/files_0.js new file mode 100644 index 00000000..297f74c2 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_0.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['eltwise_2dadd_2dmod_2ehpp_36',['eltwise-add-mod.hpp',['../eltwise-add-mod_8hpp.html',1,'']]], + ['eltwise_2dcmp_2dadd_2ehpp_37',['eltwise-cmp-add.hpp',['../eltwise-cmp-add_8hpp.html',1,'']]], + ['eltwise_2dcmp_2dsub_2dmod_2ehpp_38',['eltwise-cmp-sub-mod.hpp',['../eltwise-cmp-sub-mod_8hpp.html',1,'']]], + ['eltwise_2dfma_2dmod_2ehpp_39',['eltwise-fma-mod.hpp',['../eltwise-fma-mod_8hpp.html',1,'']]], + ['eltwise_2dmult_2dmod_2ehpp_40',['eltwise-mult-mod.hpp',['../eltwise-mult-mod_8hpp.html',1,'']]], + ['eltwise_2dreduce_2dmod_2ehpp_41',['eltwise-reduce-mod.hpp',['../eltwise-reduce-mod_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/files_1.html b/docs/docs/doxygen/html/search/files_1.html new file mode 100644 index 00000000..f27a62de --- /dev/null +++ b/docs/docs/doxygen/html/search/files_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/files_1.js b/docs/docs/doxygen/html/search/files_1.js new file mode 100644 index 00000000..e266fb35 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['intel_2dhexl_2ehpp_42',['intel-hexl.hpp',['../intel-hexl_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/files_2.html b/docs/docs/doxygen/html/search/files_2.html new file mode 100644 index 00000000..a45066e9 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/files_2.js b/docs/docs/doxygen/html/search/files_2.js new file mode 100644 index 00000000..4b7806b5 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['ntt_2ehpp_43',['ntt.hpp',['../ntt_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/files_3.html b/docs/docs/doxygen/html/search/files_3.html new file mode 100644 index 00000000..1076bc5a --- /dev/null +++ b/docs/docs/doxygen/html/search/files_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/files_3.js b/docs/docs/doxygen/html/search/files_3.js new file mode 100644 index 00000000..8c25595c --- /dev/null +++ b/docs/docs/doxygen/html/search/files_3.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['readme_2emd_44',['README.md',['../_r_e_a_d_m_e_8md.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/files_4.html b/docs/docs/doxygen/html/search/files_4.html new file mode 100644 index 00000000..e5cd7f43 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/files_4.js b/docs/docs/doxygen/html/search/files_4.js new file mode 100644 index 00000000..d53bc4f3 --- /dev/null +++ b/docs/docs/doxygen/html/search/files_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['util_2ehpp_45',['util.hpp',['../util_8hpp.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/functions_0.html b/docs/docs/doxygen/html/search/functions_0.html new file mode 100644 index 00000000..e17c7111 --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/functions_0.js b/docs/docs/doxygen/html/search/functions_0.js new file mode 100644 index 00000000..3d6261a1 --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['computeforward_46',['ComputeForward',['../classintel_1_1hexl_1_1_n_t_t.html#a7f8dac5ff3fc117d3e7259762a716140',1,'intel::hexl::NTT']]], + ['computeinverse_47',['ComputeInverse',['../classintel_1_1hexl_1_1_n_t_t.html#a31e78375dcafd5df85cb1259a9156a9a',1,'intel::hexl::NTT']]] +]; diff --git a/docs/docs/doxygen/html/search/functions_1.html b/docs/docs/doxygen/html/search/functions_1.html new file mode 100644 index 00000000..0ddac0a4 --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/functions_1.js b/docs/docs/doxygen/html/search/functions_1.js new file mode 100644 index 00000000..e79b9e2d --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_1.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['eltwiseaddmod_48',['EltwiseAddMod',['../namespaceintel_1_1hexl.html#a319244a133f57825ba7e593ad5c71709',1,'intel::hexl']]], + ['eltwisecmpadd_49',['EltwiseCmpAdd',['../namespaceintel_1_1hexl.html#ad09f0d71efdfbde79c2a8ed92d7da811',1,'intel::hexl']]], + ['eltwisecmpsubmod_50',['EltwiseCmpSubMod',['../namespaceintel_1_1hexl.html#aa06f039b71cf61990911e753595f1f78',1,'intel::hexl']]], + ['eltwisefmamod_51',['EltwiseFMAMod',['../namespaceintel_1_1hexl.html#a5b65d563391b4a1a5041633aeb118aa5',1,'intel::hexl']]], + ['eltwisemultmod_52',['EltwiseMultMod',['../namespaceintel_1_1hexl.html#a705bc0321d937ae4d1f8d50279e3cff1',1,'intel::hexl']]], + ['eltwisereducemod_53',['EltwiseReduceMod',['../namespaceintel_1_1hexl.html#af7e59b130824f9cd4ad0aa54c52ad50c',1,'intel::hexl']]] +]; diff --git a/docs/docs/doxygen/html/search/functions_2.html b/docs/docs/doxygen/html/search/functions_2.html new file mode 100644 index 00000000..2737c5ac --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/functions_2.js b/docs/docs/doxygen/html/search/functions_2.js new file mode 100644 index 00000000..17343094 --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['not_54',['Not',['../namespaceintel_1_1hexl.html#a8c654502a5e7fe2cfdd198f0fd920f2a',1,'intel::hexl']]], + ['ntt_55',['NTT',['../classintel_1_1hexl_1_1_n_t_t.html#ade0447617b50232d2a076f99e672d15c',1,'intel::hexl::NTT::NTT()'],['../classintel_1_1hexl_1_1_n_t_t.html#a3f44d9ead9be66540b9732d5c98bb000',1,'intel::hexl::NTT::NTT(uint64_t degree, uint64_t p)'],['../classintel_1_1hexl_1_1_n_t_t.html#a63eea17916855a9ec73a2cc8c57b2a63',1,'intel::hexl::NTT::NTT(uint64_t degree, uint64_t p, uint64_t root_of_unity)']]] +]; diff --git a/docs/docs/doxygen/html/search/functions_3.html b/docs/docs/doxygen/html/search/functions_3.html new file mode 100644 index 00000000..6da86e7d --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/functions_3.js b/docs/docs/doxygen/html/search/functions_3.js new file mode 100644 index 00000000..17d6e527 --- /dev/null +++ b/docs/docs/doxygen/html/search/functions_3.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_7entt_56',['~NTT',['../classintel_1_1hexl_1_1_n_t_t.html#ab6fca1753db0834c692232e8897c725f',1,'intel::hexl::NTT']]] +]; diff --git a/docs/docs/doxygen/html/search/mag_sel.png b/docs/docs/doxygen/html/search/mag_sel.png new file mode 100644 index 00000000..39c0ed52 Binary files /dev/null and b/docs/docs/doxygen/html/search/mag_sel.png differ diff --git a/docs/docs/doxygen/html/search/namespaces_0.html b/docs/docs/doxygen/html/search/namespaces_0.html new file mode 100644 index 00000000..76996d1c --- /dev/null +++ b/docs/docs/doxygen/html/search/namespaces_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/namespaces_0.js b/docs/docs/doxygen/html/search/namespaces_0.js new file mode 100644 index 00000000..469c59aa --- /dev/null +++ b/docs/docs/doxygen/html/search/namespaces_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['hexl_34',['hexl',['../namespaceintel_1_1hexl.html',1,'intel']]], + ['intel_35',['intel',['../namespaceintel.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/nomatches.html b/docs/docs/doxygen/html/search/nomatches.html new file mode 100644 index 00000000..43773208 --- /dev/null +++ b/docs/docs/doxygen/html/search/nomatches.html @@ -0,0 +1,12 @@ + + + + + + + +
    +
    No Matches
    +
    + + diff --git a/docs/docs/doxygen/html/search/pages_0.html b/docs/docs/doxygen/html/search/pages_0.html new file mode 100644 index 00000000..9a6a29ad --- /dev/null +++ b/docs/docs/doxygen/html/search/pages_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/docs/doxygen/html/search/pages_0.js b/docs/docs/doxygen/html/search/pages_0.js new file mode 100644 index 00000000..8c0a0433 --- /dev/null +++ b/docs/docs/doxygen/html/search/pages_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['intel_20homomorphic_20encryption_20acceleration_20library_20_28hexl_29_66',['Intel Homomorphic Encryption Acceleration Library (HEXL)',['../index.html',1,'']]] +]; diff --git a/docs/docs/doxygen/html/search/search.css b/docs/docs/doxygen/html/search/search.css new file mode 100644 index 00000000..3cf9df94 --- /dev/null +++ b/docs/docs/doxygen/html/search/search.css @@ -0,0 +1,271 @@ +/*---------------- Search Box */ + +#FSearchBox { + float: left; +} + +#MSearchBox { + white-space : nowrap; + float: none; + margin-top: 8px; + right: 0px; + width: 170px; + height: 24px; + z-index: 102; +} + +#MSearchBox .left +{ + display:block; + position:absolute; + left:10px; + width:20px; + height:19px; + background:url('search_l.png') no-repeat; + background-position:right; +} + +#MSearchSelect { + display:block; + position:absolute; + width:20px; + height:19px; +} + +.left #MSearchSelect { + left:4px; +} + +.right #MSearchSelect { + right:5px; +} + +#MSearchField { + display:block; + position:absolute; + height:19px; + background:url('search_m.png') repeat-x; + border:none; + width:115px; + margin-left:20px; + padding-left:4px; + color: #909090; + outline: none; + font: 9pt Arial, Verdana, sans-serif; + -webkit-border-radius: 0px; +} + +#FSearchBox #MSearchField { + margin-left:15px; +} + +#MSearchBox .right { + display:block; + position:absolute; + right:10px; + top:8px; + width:20px; + height:19px; + background:url('search_r.png') no-repeat; + background-position:left; +} + +#MSearchClose { + display: none; + position: absolute; + top: 4px; + background : none; + border: none; + margin: 0px 4px 0px 0px; + padding: 0px 0px; + outline: none; +} + +.left #MSearchClose { + left: 6px; +} + +.right #MSearchClose { + right: 2px; +} + +.MSearchBoxActive #MSearchField { + color: #000000; +} + +/*---------------- Search filter selection */ + +#MSearchSelectWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #90A5CE; + background-color: #F9FAFC; + z-index: 10001; + padding-top: 4px; + padding-bottom: 4px; + -moz-border-radius: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +.SelectItem { + font: 8pt Arial, Verdana, sans-serif; + padding-left: 2px; + padding-right: 12px; + border: 0px; +} + +span.SelectionMark { + margin-right: 4px; + font-family: monospace; + outline-style: none; + text-decoration: none; +} + +a.SelectItem { + display: block; + outline-style: none; + color: #000000; + text-decoration: none; + padding-left: 6px; + padding-right: 12px; +} + +a.SelectItem:focus, +a.SelectItem:active { + color: #000000; + outline-style: none; + text-decoration: none; +} + +a.SelectItem:hover { + color: #FFFFFF; + background-color: #3D578C; + outline-style: none; + text-decoration: none; + cursor: pointer; + display: block; +} + +/*---------------- Search results window */ + +iframe#MSearchResults { + width: 60ex; + height: 15em; +} + +#MSearchResultsWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #000; + background-color: #EEF1F7; + z-index:10000; +} + +/* ----------------------------------- */ + + +#SRIndex { + clear:both; + padding-bottom: 15px; +} + +.SREntry { + font-size: 10pt; + padding-left: 1ex; +} + +.SRPage .SREntry { + font-size: 8pt; + padding: 1px 5px; +} + +body.SRPage { + margin: 5px 2px; +} + +.SRChildren { + padding-left: 3ex; padding-bottom: .5em +} + +.SRPage .SRChildren { + display: none; +} + +.SRSymbol { + font-weight: bold; + color: #425E97; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRScope { + display: block; + color: #425E97; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRSymbol:focus, a.SRSymbol:active, +a.SRScope:focus, a.SRScope:active { + text-decoration: underline; +} + +span.SRScope { + padding-left: 4px; +} + +.SRPage .SRStatus { + padding: 2px 5px; + font-size: 8pt; + font-style: italic; +} + +.SRResult { + display: none; +} + +DIV.searchresults { + margin-left: 10px; + margin-right: 10px; +} + +/*---------------- External search page results */ + +.searchresult { + background-color: #F0F3F8; +} + +.pages b { + color: white; + padding: 5px 5px 3px 5px; + background-image: url("../tab_a.png"); + background-repeat: repeat-x; + text-shadow: 0 1px 1px #000000; +} + +.pages { + line-height: 17px; + margin-left: 4px; + text-decoration: none; +} + +.hl { + font-weight: bold; +} + +#searchresults { + margin-bottom: 20px; +} + +.searchpages { + margin-top: 10px; +} + diff --git a/docs/docs/doxygen/html/search/search.js b/docs/docs/doxygen/html/search/search.js new file mode 100644 index 00000000..a554ab9c --- /dev/null +++ b/docs/docs/doxygen/html/search/search.js @@ -0,0 +1,814 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function convertToId(search) +{ + var result = ''; + for (i=0;i do a search + { + this.Search(); + } + } + + this.OnSearchSelectKey = function(evt) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==40 && this.searchIndex0) // Up + { + this.searchIndex--; + this.OnSelectItem(this.searchIndex); + } + else if (e.keyCode==13 || e.keyCode==27) + { + this.OnSelectItem(this.searchIndex); + this.CloseSelectionWindow(); + this.DOMSearchField().focus(); + } + return false; + } + + // --------- Actions + + // Closes the results window. + this.CloseResultsWindow = function() + { + this.DOMPopupSearchResultsWindow().style.display = 'none'; + this.DOMSearchClose().style.display = 'none'; + this.Activate(false); + } + + this.CloseSelectionWindow = function() + { + this.DOMSearchSelectWindow().style.display = 'none'; + } + + // Performs a search. + this.Search = function() + { + this.keyTimeout = 0; + + // strip leading whitespace + var searchValue = this.DOMSearchField().value.replace(/^ +/, ""); + + var code = searchValue.toLowerCase().charCodeAt(0); + var idxChar = searchValue.substr(0, 1).toLowerCase(); + if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair + { + idxChar = searchValue.substr(0, 2); + } + + var resultsPage; + var resultsPageWithSearch; + var hasResultsPage; + + var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar); + if (idx!=-1) + { + var hexCode=idx.toString(16); + resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html'; + resultsPageWithSearch = resultsPage+'?'+escape(searchValue); + hasResultsPage = true; + } + else // nothing available for this search term + { + resultsPage = this.resultsPath + '/nomatches.html'; + resultsPageWithSearch = resultsPage; + hasResultsPage = false; + } + + window.frames.MSearchResults.location = resultsPageWithSearch; + var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow(); + + if (domPopupSearchResultsWindow.style.display!='block') + { + var domSearchBox = this.DOMSearchBox(); + this.DOMSearchClose().style.display = 'inline'; + if (this.insideFrame) + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + domPopupSearchResultsWindow.style.position = 'relative'; + domPopupSearchResultsWindow.style.display = 'block'; + var width = document.body.clientWidth - 8; // the -8 is for IE :-( + domPopupSearchResultsWindow.style.width = width + 'px'; + domPopupSearchResults.style.width = width + 'px'; + } + else + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + var left = getXPos(domSearchBox) + 150; // domSearchBox.offsetWidth; + var top = getYPos(domSearchBox) + 20; // domSearchBox.offsetHeight + 1; + domPopupSearchResultsWindow.style.display = 'block'; + left -= domPopupSearchResults.offsetWidth; + domPopupSearchResultsWindow.style.top = top + 'px'; + domPopupSearchResultsWindow.style.left = left + 'px'; + } + } + + this.lastSearchValue = searchValue; + this.lastResultsPage = resultsPage; + } + + // -------- Activation Functions + + // Activates or deactivates the search panel, resetting things to + // their default values if necessary. + this.Activate = function(isActive) + { + if (isActive || // open it + this.DOMPopupSearchResultsWindow().style.display == 'block' + ) + { + this.DOMSearchBox().className = 'MSearchBoxActive'; + + var searchField = this.DOMSearchField(); + + if (searchField.value == this.searchLabel) // clear "Search" term upon entry + { + searchField.value = ''; + this.searchActive = true; + } + } + else if (!isActive) // directly remove the panel + { + this.DOMSearchBox().className = 'MSearchBoxInactive'; + this.DOMSearchField().value = this.searchLabel; + this.searchActive = false; + this.lastSearchValue = '' + this.lastResultsPage = ''; + } + } +} + +// ----------------------------------------------------------------------- + +// The class that handles everything on the search results page. +function SearchResults(name) +{ + // The number of matches from the last run of . + this.lastMatchCount = 0; + this.lastKey = 0; + this.repeatOn = false; + + // Toggles the visibility of the passed element ID. + this.FindChildElement = function(id) + { + var parentElement = document.getElementById(id); + var element = parentElement.firstChild; + + while (element && element!=parentElement) + { + if (element.nodeName == 'DIV' && element.className == 'SRChildren') + { + return element; + } + + if (element.nodeName == 'DIV' && element.hasChildNodes()) + { + element = element.firstChild; + } + else if (element.nextSibling) + { + element = element.nextSibling; + } + else + { + do + { + element = element.parentNode; + } + while (element && element!=parentElement && !element.nextSibling); + + if (element && element!=parentElement) + { + element = element.nextSibling; + } + } + } + } + + this.Toggle = function(id) + { + var element = this.FindChildElement(id); + if (element) + { + if (element.style.display == 'block') + { + element.style.display = 'none'; + } + else + { + element.style.display = 'block'; + } + } + } + + // Searches for the passed string. If there is no parameter, + // it takes it from the URL query. + // + // Always returns true, since other documents may try to call it + // and that may or may not be possible. + this.Search = function(search) + { + if (!search) // get search word from URL + { + search = window.location.search; + search = search.substring(1); // Remove the leading '?' + search = unescape(search); + } + + search = search.replace(/^ +/, ""); // strip leading spaces + search = search.replace(/ +$/, ""); // strip trailing spaces + search = search.toLowerCase(); + search = convertToId(search); + + var resultRows = document.getElementsByTagName("div"); + var matches = 0; + + var i = 0; + while (i < resultRows.length) + { + var row = resultRows.item(i); + if (row.className == "SRResult") + { + var rowMatchName = row.id.toLowerCase(); + rowMatchName = rowMatchName.replace(/^sr\d*_/, ''); // strip 'sr123_' + + if (search.length<=rowMatchName.length && + rowMatchName.substr(0, search.length)==search) + { + row.style.display = 'block'; + matches++; + } + else + { + row.style.display = 'none'; + } + } + i++; + } + document.getElementById("Searching").style.display='none'; + if (matches == 0) // no results + { + document.getElementById("NoMatches").style.display='block'; + } + else // at least one result + { + document.getElementById("NoMatches").style.display='none'; + } + this.lastMatchCount = matches; + return true; + } + + // return the first item with index index or higher that is visible + this.NavNext = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index++; + } + return focusItem; + } + + this.NavPrev = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index--; + } + return focusItem; + } + + this.ProcessKeys = function(e) + { + if (e.type == "keydown") + { + this.repeatOn = false; + this.lastKey = e.keyCode; + } + else if (e.type == "keypress") + { + if (!this.repeatOn) + { + if (this.lastKey) this.repeatOn = true; + return false; // ignore first keypress after keydown + } + } + else if (e.type == "keyup") + { + this.lastKey = 0; + this.repeatOn = false; + } + return this.lastKey!=0; + } + + this.Nav = function(evt,itemIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + var newIndex = itemIndex-1; + var focusItem = this.NavPrev(newIndex); + if (focusItem) + { + var child = this.FindChildElement(focusItem.parentNode.parentNode.id); + if (child && child.style.display == 'block') // children visible + { + var n=0; + var tmpElem; + while (1) // search for last child + { + tmpElem = document.getElementById('Item'+newIndex+'_c'+n); + if (tmpElem) + { + focusItem = tmpElem; + } + else // found it! + { + break; + } + n++; + } + } + } + if (focusItem) + { + focusItem.focus(); + } + else // return focus to search field + { + parent.document.getElementById("MSearchField").focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = itemIndex+1; + var focusItem; + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem && elem.style.display == 'block') // children visible + { + focusItem = document.getElementById('Item'+itemIndex+'_c0'); + } + if (!focusItem) focusItem = this.NavNext(newIndex); + if (focusItem) focusItem.focus(); + } + else if (this.lastKey==39) // Right + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'block'; + } + else if (this.lastKey==37) // Left + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'none'; + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } + + this.NavChild = function(evt,itemIndex,childIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + if (childIndex>0) + { + var newIndex = childIndex-1; + document.getElementById('Item'+itemIndex+'_c'+newIndex).focus(); + } + else // already at first child, jump to parent + { + document.getElementById('Item'+itemIndex).focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = childIndex+1; + var elem = document.getElementById('Item'+itemIndex+'_c'+newIndex); + if (!elem) // last child, jump to parent next parent + { + elem = this.NavNext(itemIndex+1); + } + if (elem) + { + elem.focus(); + } + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } +} + +function setKeyActions(elem,action) +{ + elem.setAttribute('onkeydown',action); + elem.setAttribute('onkeypress',action); + elem.setAttribute('onkeyup',action); +} + +function setClassAttr(elem,attr) +{ + elem.setAttribute('class',attr); + elem.setAttribute('className',attr); +} + +function createResults() +{ + var results = document.getElementById("SRResults"); + for (var e=0; eli>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace!important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0!important;-webkit-border-radius:0;border-radius:0!important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px!important;-webkit-border-radius:5px;border-radius:5px!important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0!important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px!important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}} \ No newline at end of file diff --git a/docs/docs/doxygen/html/util_8hpp.html b/docs/docs/doxygen/html/util_8hpp.html new file mode 100644 index 00000000..1a4c8c28 --- /dev/null +++ b/docs/docs/doxygen/html/util_8hpp.html @@ -0,0 +1,125 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/util/util.hpp File Reference + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    util.hpp File Reference
    +
    +
    + +

    Go to the source code of this file.

    + + + + + + +

    +Namespaces

     intel
     
     intel::hexl
     
    + + + + +

    +Enumerations

    enum  intel::hexl::CMPINT {
    +  intel::hexl::CMPINT::EQ = 0, +intel::hexl::CMPINT::LT = 1, +intel::hexl::CMPINT::LE = 2, +intel::hexl::CMPINT::FALSE = 3, +
    +  intel::hexl::CMPINT::NE = 4, +intel::hexl::CMPINT::NLT = 5, +intel::hexl::CMPINT::NLE = 6, +intel::hexl::CMPINT::TRUE = 7 +
    + }
     Represents binary operations between two boolean values. More...
     
    + + + + +

    +Functions

    CMPINT intel::hexl::Not (CMPINT cmp)
     Returns the logical negation of a binary operation. More...
     
    +
    + + + + diff --git a/docs/docs/doxygen/html/util_8hpp_source.html b/docs/docs/doxygen/html/util_8hpp_source.html new file mode 100644 index 00000000..384a937a --- /dev/null +++ b/docs/docs/doxygen/html/util_8hpp_source.html @@ -0,0 +1,146 @@ + + + + + + + +Intel HEXL: /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/util/util.hpp Source File + + + + + + + + + + + +
    +
    + + + + + + +
    +
    Intel HEXL +
    +
    Intel Homomorphic Encryption Acceleration Library, accelerating the modular arithmetic operations used in homomorphic encryption.
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    util.hpp
    +
    +
    +Go to the documentation of this file.
    1 // Copyright (C) 2020-2021 Intel Corporation
    +
    2 // SPDX-License-Identifier: Apache-2.0
    +
    3 
    +
    4 #pragma once
    +
    5 
    +
    6 namespace intel {
    +
    7 namespace hexl {
    +
    8 
    +
    9 #undef TRUE // MSVC defines TRUE
    +
    10 #undef FALSE // MSVC defines FALSE
    +
    11 
    +
    14 enum class CMPINT {
    +
    15  EQ = 0,
    +
    16  LT = 1,
    +
    17  LE = 2,
    +
    18  FALSE = 3,
    +
    19  NE = 4,
    +
    20  NLT = 5,
    +
    21  NLE = 6,
    +
    22  TRUE = 7
    +
    23 };
    +
    24 
    +
    27 inline CMPINT Not(CMPINT cmp) {
    +
    28  switch (cmp) {
    +
    29  case CMPINT::EQ:
    +
    30  return CMPINT::NE;
    +
    31  case CMPINT::LT:
    +
    32  return CMPINT::NLT;
    +
    33  case CMPINT::LE:
    +
    34  return CMPINT::NLE;
    +
    35  case CMPINT::FALSE:
    +
    36  return CMPINT::TRUE;
    +
    37  case CMPINT::NE:
    +
    38  return CMPINT::EQ;
    +
    39  case CMPINT::NLT:
    +
    40  return CMPINT::LT;
    +
    41  case CMPINT::NLE:
    +
    42  return CMPINT::LE;
    +
    43  case CMPINT::TRUE:
    +
    44  return CMPINT::FALSE;
    +
    45  default:
    +
    46  return CMPINT::FALSE;
    +
    47  }
    +
    48 }
    +
    49 
    +
    50 } // namespace hexl
    +
    51 } // namespace intel
    +
    + +
    @ NLT
    Not less than.
    +
    @ NLE
    Not less than or equal.
    + +
    CMPINT
    Represents binary operations between two boolean values.
    Definition: util.hpp:14
    +
    CMPINT Not(CMPINT cmp)
    Returns the logical negation of a binary operation.
    Definition: util.hpp:27
    +
    @ LE
    Less than or equal.
    +
    Definition: eltwise-add-mod.hpp:8
    + + + + + + + diff --git a/docs/docs/doxygen/latex/Makefile b/docs/docs/doxygen/latex/Makefile new file mode 100644 index 00000000..877c9ccc --- /dev/null +++ b/docs/docs/doxygen/latex/Makefile @@ -0,0 +1,23 @@ +LATEX_CMD=pdflatex + +all: refman.pdf + +pdf: refman.pdf + +refman.pdf: clean refman.tex + $(LATEX_CMD) refman + makeindex refman.idx + $(LATEX_CMD) refman + latex_count=8 ; \ + while egrep -s 'Rerun (LaTeX|to get cross-references right)' refman.log && [ $$latex_count -gt 0 ] ;\ + do \ + echo "Rerunning latex...." ;\ + $(LATEX_CMD) refman ;\ + latex_count=`expr $$latex_count - 1` ;\ + done + makeindex refman.idx + $(LATEX_CMD) refman + + +clean: + rm -f *.ps *.dvi *.aux *.toc *.idx *.ind *.ilg *.log *.out *.brf *.blg *.bbl refman.pdf diff --git a/docs/docs/doxygen/latex/_r_e_a_d_m_e_8md.tex b/docs/docs/doxygen/latex/_r_e_a_d_m_e_8md.tex new file mode 100644 index 00000000..678bf0a2 --- /dev/null +++ b/docs/docs/doxygen/latex/_r_e_a_d_m_e_8md.tex @@ -0,0 +1,2 @@ +\hypertarget{_r_e_a_d_m_e_8md}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/\+R\+E\+A\+D\+ME.md File Reference} +\label{_r_e_a_d_m_e_8md}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/README.md@{/Users/fboemer/repos/DBIO/intel-\/hexl/README.md}} diff --git a/docs/docs/doxygen/latex/annotated.tex b/docs/docs/doxygen/latex/annotated.tex new file mode 100644 index 00000000..0ef2f4af --- /dev/null +++ b/docs/docs/doxygen/latex/annotated.tex @@ -0,0 +1,4 @@ +\doxysection{Class List} +Here are the classes, structs, unions and interfaces with brief descriptions\+:\begin{DoxyCompactList} +\item\contentsline{section}{\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{intel\+::hexl\+::\+N\+TT}} \\*Performs negacyclic forward and inverse number-\/theoretic transform (\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}), commonly used in R\+L\+WE cryptography }{\pageref{classintel_1_1hexl_1_1_n_t_t}}{} +\end{DoxyCompactList} diff --git a/docs/docs/doxygen/latex/classintel_1_1hexl_1_1_n_t_t.tex b/docs/docs/doxygen/latex/classintel_1_1hexl_1_1_n_t_t.tex new file mode 100644 index 00000000..1205db4c --- /dev/null +++ b/docs/docs/doxygen/latex/classintel_1_1hexl_1_1_n_t_t.tex @@ -0,0 +1,136 @@ +\hypertarget{classintel_1_1hexl_1_1_n_t_t}{}\doxysection{intel\+::hexl\+::N\+TT Class Reference} +\label{classintel_1_1hexl_1_1_n_t_t}\index{intel::hexl::NTT@{intel::hexl::NTT}} + + +Performs negacyclic forward and inverse number-\/theoretic transform (\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}), commonly used in R\+L\+WE cryptography. + + + + +{\ttfamily \#include $<$ntt.\+hpp$>$} + +\doxysubsection*{Public Member Functions} +\begin{DoxyCompactItemize} +\item +\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t_ade0447617b50232d2a076f99e672d15c}{N\+TT}} () +\begin{DoxyCompactList}\small\item\em Initializes an empty \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object. \end{DoxyCompactList}\item +\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t_ab6fca1753db0834c692232e8897c725f}{$\sim$\+N\+TT}} () +\begin{DoxyCompactList}\small\item\em Destructs the \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object. \end{DoxyCompactList}\item +\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t_a3f44d9ead9be66540b9732d5c98bb000}{N\+TT}} (uint64\+\_\+t degree, uint64\+\_\+t p) +\begin{DoxyCompactList}\small\item\em Performs pre-\/computation necessary for forward and inverse transforms. \end{DoxyCompactList}\item +\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t_a63eea17916855a9ec73a2cc8c57b2a63}{N\+TT}} (uint64\+\_\+t degree, uint64\+\_\+t p, uint64\+\_\+t root\+\_\+of\+\_\+unity) +\begin{DoxyCompactList}\small\item\em Initializes an \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object with degree {\ttfamily degree} and modulus {\ttfamily p}. \end{DoxyCompactList}\item +void \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t_a7f8dac5ff3fc117d3e7259762a716140}{Compute\+Forward}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand, uint64\+\_\+t input\+\_\+mod\+\_\+factor, uint64\+\_\+t output\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Compute forward \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}. Results are bit-\/reversed. \end{DoxyCompactList}\item +void \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t_a31e78375dcafd5df85cb1259a9156a9a}{Compute\+Inverse}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand, uint64\+\_\+t input\+\_\+mod\+\_\+factor, uint64\+\_\+t output\+\_\+mod\+\_\+factor) +\end{DoxyCompactItemize} + + +\doxysubsection{Detailed Description} +Performs negacyclic forward and inverse number-\/theoretic transform (\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}), commonly used in R\+L\+WE cryptography. + +The number-\/theoretic transform (\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}) specializes the discrete Fourier transform (D\+FT) to the finite field $ \mathbb{Z}_p / (X^N + 1) $. + +\doxysubsection{Constructor \& Destructor Documentation} +\mbox{\Hypertarget{classintel_1_1hexl_1_1_n_t_t_ade0447617b50232d2a076f99e672d15c}\label{classintel_1_1hexl_1_1_n_t_t_ade0447617b50232d2a076f99e672d15c}} +\index{intel::hexl::NTT@{intel::hexl::NTT}!NTT@{NTT}} +\index{NTT@{NTT}!intel::hexl::NTT@{intel::hexl::NTT}} +\doxysubsubsection{\texorpdfstring{NTT()}{NTT()}\hspace{0.1cm}{\footnotesize\ttfamily [1/3]}} +{\footnotesize\ttfamily intel\+::hexl\+::\+N\+T\+T\+::\+N\+TT (\begin{DoxyParamCaption}{ }\end{DoxyParamCaption})} + + + +Initializes an empty \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object. + +\mbox{\Hypertarget{classintel_1_1hexl_1_1_n_t_t_ab6fca1753db0834c692232e8897c725f}\label{classintel_1_1hexl_1_1_n_t_t_ab6fca1753db0834c692232e8897c725f}} +\index{intel::hexl::NTT@{intel::hexl::NTT}!````~NTT@{$\sim$NTT}} +\index{````~NTT@{$\sim$NTT}!intel::hexl::NTT@{intel::hexl::NTT}} +\doxysubsubsection{\texorpdfstring{$\sim$NTT()}{~NTT()}} +{\footnotesize\ttfamily intel\+::hexl\+::\+N\+T\+T\+::$\sim$\+N\+TT (\begin{DoxyParamCaption}{ }\end{DoxyParamCaption})} + + + +Destructs the \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object. + +\mbox{\Hypertarget{classintel_1_1hexl_1_1_n_t_t_a3f44d9ead9be66540b9732d5c98bb000}\label{classintel_1_1hexl_1_1_n_t_t_a3f44d9ead9be66540b9732d5c98bb000}} +\index{intel::hexl::NTT@{intel::hexl::NTT}!NTT@{NTT}} +\index{NTT@{NTT}!intel::hexl::NTT@{intel::hexl::NTT}} +\doxysubsubsection{\texorpdfstring{NTT()}{NTT()}\hspace{0.1cm}{\footnotesize\ttfamily [2/3]}} +{\footnotesize\ttfamily intel\+::hexl\+::\+N\+T\+T\+::\+N\+TT (\begin{DoxyParamCaption}\item[{uint64\+\_\+t}]{degree, }\item[{uint64\+\_\+t}]{p }\end{DoxyParamCaption})} + + + +Performs pre-\/computation necessary for forward and inverse transforms. + +Initializes an \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object with degree {\ttfamily degree} and modulus {\ttfamily p}. +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ in}} & {\em degree} & a.\+k.\+a. N. Size of the \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} transform. Must be a power of 2 \\ +\hline +\mbox{\texttt{ in}} & {\em p} & Prime modulus. Must satisfy $ p == 1 \mod 2N $ \\ +\hline +\end{DoxyParams} +\mbox{\Hypertarget{classintel_1_1hexl_1_1_n_t_t_a63eea17916855a9ec73a2cc8c57b2a63}\label{classintel_1_1hexl_1_1_n_t_t_a63eea17916855a9ec73a2cc8c57b2a63}} +\index{intel::hexl::NTT@{intel::hexl::NTT}!NTT@{NTT}} +\index{NTT@{NTT}!intel::hexl::NTT@{intel::hexl::NTT}} +\doxysubsubsection{\texorpdfstring{NTT()}{NTT()}\hspace{0.1cm}{\footnotesize\ttfamily [3/3]}} +{\footnotesize\ttfamily intel\+::hexl\+::\+N\+T\+T\+::\+N\+TT (\begin{DoxyParamCaption}\item[{uint64\+\_\+t}]{degree, }\item[{uint64\+\_\+t}]{p, }\item[{uint64\+\_\+t}]{root\+\_\+of\+\_\+unity }\end{DoxyParamCaption})} + + + +Initializes an \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} object with degree {\ttfamily degree} and modulus {\ttfamily p}. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ in}} & {\em degree} & a.\+k.\+a. N. Size of the \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} transform. Must be a power of 2 \\ +\hline +\mbox{\texttt{ in}} & {\em p} & Prime modulus. Must satisfy $ p == 1 \mod 2N $ \\ +\hline +\mbox{\texttt{ in}} & {\em root\+\_\+of\+\_\+unity} & 2N\textquotesingle{}th root of unity in $ \mathbb{Z_p} $.\\ +\hline +\end{DoxyParams} +Performs pre-\/computation necessary for forward and inverse transforms + +\doxysubsection{Member Function Documentation} +\mbox{\Hypertarget{classintel_1_1hexl_1_1_n_t_t_a7f8dac5ff3fc117d3e7259762a716140}\label{classintel_1_1hexl_1_1_n_t_t_a7f8dac5ff3fc117d3e7259762a716140}} +\index{intel::hexl::NTT@{intel::hexl::NTT}!ComputeForward@{ComputeForward}} +\index{ComputeForward@{ComputeForward}!intel::hexl::NTT@{intel::hexl::NTT}} +\doxysubsubsection{\texorpdfstring{ComputeForward()}{ComputeForward()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+N\+T\+T\+::\+Compute\+Forward (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand, }\item[{uint64\+\_\+t}]{input\+\_\+mod\+\_\+factor, }\item[{uint64\+\_\+t}]{output\+\_\+mod\+\_\+factor }\end{DoxyParamCaption})} + + + +Compute forward \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}. Results are bit-\/reversed. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores the result \\ +\hline +\mbox{\texttt{ in}} & {\em operand} & Data on which to compute the \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} \\ +\hline +\mbox{\texttt{ in}} & {\em input\+\_\+mod\+\_\+factor} & Assume input {\ttfamily operand} are in \mbox{[}0, input\+\_\+mod\+\_\+factor $\ast$ p). Must be 1, 2 or 4. \\ +\hline +\mbox{\texttt{ in}} & {\em output\+\_\+mod\+\_\+factor} & Returns output {\ttfamily operand} in \mbox{[}0, output\+\_\+mod\+\_\+factor $\ast$ p). Must be 1 or 4. \\ +\hline +\end{DoxyParams} +\mbox{\Hypertarget{classintel_1_1hexl_1_1_n_t_t_a31e78375dcafd5df85cb1259a9156a9a}\label{classintel_1_1hexl_1_1_n_t_t_a31e78375dcafd5df85cb1259a9156a9a}} +\index{intel::hexl::NTT@{intel::hexl::NTT}!ComputeInverse@{ComputeInverse}} +\index{ComputeInverse@{ComputeInverse}!intel::hexl::NTT@{intel::hexl::NTT}} +\doxysubsubsection{\texorpdfstring{ComputeInverse()}{ComputeInverse()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+N\+T\+T\+::\+Compute\+Inverse (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand, }\item[{uint64\+\_\+t}]{input\+\_\+mod\+\_\+factor, }\item[{uint64\+\_\+t}]{output\+\_\+mod\+\_\+factor }\end{DoxyParamCaption})} + +Compute inverse \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}. Results are bit-\/reversed. +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores the result \\ +\hline +\mbox{\texttt{ in}} & {\em operand} & Data on which to compute the \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} \\ +\hline +\mbox{\texttt{ in}} & {\em input\+\_\+mod\+\_\+factor} & Assume input {\ttfamily operand} are in \mbox{[}0, input\+\_\+mod\+\_\+factor $\ast$ p). Must be 1 or 2. \\ +\hline +\mbox{\texttt{ in}} & {\em output\+\_\+mod\+\_\+factor} & Returns output {\ttfamily operand} in \mbox{[}0, output\+\_\+mod\+\_\+factor $\ast$ p). Must be 1 or 2. \\ +\hline +\end{DoxyParams} + + +The documentation for this class was generated from the following file\+:\begin{DoxyCompactItemize} +\item +/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt/\mbox{\hyperlink{ntt_8hpp}{ntt.\+hpp}}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/dir_60e7388d20bffeeed71217422ae2faa2.tex b/docs/docs/doxygen/latex/dir_60e7388d20bffeeed71217422ae2faa2.tex new file mode 100644 index 00000000..5ad4aa22 --- /dev/null +++ b/docs/docs/doxygen/latex/dir_60e7388d20bffeeed71217422ae2faa2.tex @@ -0,0 +1,7 @@ +\hypertarget{dir_60e7388d20bffeeed71217422ae2faa2}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util Directory Reference} +\label{dir_60e7388d20bffeeed71217422ae2faa2}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util Directory Reference@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util Directory Reference}} +\doxysubsection*{Files} +\begin{DoxyCompactItemize} +\item +file \mbox{\hyperlink{util_8hpp}{util.\+hpp}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/dir_7759c3a881395f02ed4947f5a1aa5b9c.tex b/docs/docs/doxygen/latex/dir_7759c3a881395f02ed4947f5a1aa5b9c.tex new file mode 100644 index 00000000..6d3cc7e7 --- /dev/null +++ b/docs/docs/doxygen/latex/dir_7759c3a881395f02ed4947f5a1aa5b9c.tex @@ -0,0 +1,16 @@ +\hypertarget{dir_7759c3a881395f02ed4947f5a1aa5b9c}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl Directory Reference} +\label{dir_7759c3a881395f02ed4947f5a1aa5b9c}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl Directory Reference@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl Directory Reference}} +\doxysubsection*{Directories} +\begin{DoxyCompactItemize} +\item +directory \mbox{\hyperlink{dir_b327ef8739a3d23275834e47dda5cef1}{eltwise}} +\item +directory \mbox{\hyperlink{dir_b272e9f08317806cfbaee27c029c625d}{ntt}} +\item +directory \mbox{\hyperlink{dir_60e7388d20bffeeed71217422ae2faa2}{util}} +\end{DoxyCompactItemize} +\doxysubsection*{Files} +\begin{DoxyCompactItemize} +\item +file \mbox{\hyperlink{intel-hexl_8hpp}{intel-\/hexl.\+hpp}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/dir_8420cd92772e9af80a76f3e30148eb70.tex b/docs/docs/doxygen/latex/dir_8420cd92772e9af80a76f3e30148eb70.tex new file mode 100644 index 00000000..7c2fa7e2 --- /dev/null +++ b/docs/docs/doxygen/latex/dir_8420cd92772e9af80a76f3e30148eb70.tex @@ -0,0 +1,7 @@ +\hypertarget{dir_8420cd92772e9af80a76f3e30148eb70}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl Directory Reference} +\label{dir_8420cd92772e9af80a76f3e30148eb70}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl Directory Reference@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl Directory Reference}} +\doxysubsection*{Directories} +\begin{DoxyCompactItemize} +\item +directory \mbox{\hyperlink{dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa}{include}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.tex b/docs/docs/doxygen/latex/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.tex new file mode 100644 index 00000000..e76d9a54 --- /dev/null +++ b/docs/docs/doxygen/latex/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.tex @@ -0,0 +1,7 @@ +\hypertarget{dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include Directory Reference} +\label{dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include Directory Reference@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include Directory Reference}} +\doxysubsection*{Directories} +\begin{DoxyCompactItemize} +\item +directory \mbox{\hyperlink{dir_7759c3a881395f02ed4947f5a1aa5b9c}{intel-\/hexl}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/dir_b272e9f08317806cfbaee27c029c625d.tex b/docs/docs/doxygen/latex/dir_b272e9f08317806cfbaee27c029c625d.tex new file mode 100644 index 00000000..1d18801e --- /dev/null +++ b/docs/docs/doxygen/latex/dir_b272e9f08317806cfbaee27c029c625d.tex @@ -0,0 +1,7 @@ +\hypertarget{dir_b272e9f08317806cfbaee27c029c625d}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt Directory Reference} +\label{dir_b272e9f08317806cfbaee27c029c625d}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt Directory Reference@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt Directory Reference}} +\doxysubsection*{Files} +\begin{DoxyCompactItemize} +\item +file \mbox{\hyperlink{ntt_8hpp}{ntt.\+hpp}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/dir_b327ef8739a3d23275834e47dda5cef1.tex b/docs/docs/doxygen/latex/dir_b327ef8739a3d23275834e47dda5cef1.tex new file mode 100644 index 00000000..df5c133c --- /dev/null +++ b/docs/docs/doxygen/latex/dir_b327ef8739a3d23275834e47dda5cef1.tex @@ -0,0 +1,17 @@ +\hypertarget{dir_b327ef8739a3d23275834e47dda5cef1}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise Directory Reference} +\label{dir_b327ef8739a3d23275834e47dda5cef1}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise Directory Reference@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise Directory Reference}} +\doxysubsection*{Files} +\begin{DoxyCompactItemize} +\item +file \mbox{\hyperlink{eltwise-add-mod_8hpp}{eltwise-\/add-\/mod.\+hpp}} +\item +file \mbox{\hyperlink{eltwise-cmp-add_8hpp}{eltwise-\/cmp-\/add.\+hpp}} +\item +file \mbox{\hyperlink{eltwise-cmp-sub-mod_8hpp}{eltwise-\/cmp-\/sub-\/mod.\+hpp}} +\item +file \mbox{\hyperlink{eltwise-fma-mod_8hpp}{eltwise-\/fma-\/mod.\+hpp}} +\item +file \mbox{\hyperlink{eltwise-mult-mod_8hpp}{eltwise-\/mult-\/mod.\+hpp}} +\item +file \mbox{\hyperlink{eltwise-reduce-mod_8hpp}{eltwise-\/reduce-\/mod.\+hpp}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/doxygen.sty b/docs/docs/doxygen/latex/doxygen.sty new file mode 100644 index 00000000..78a52546 --- /dev/null +++ b/docs/docs/doxygen/latex/doxygen.sty @@ -0,0 +1,576 @@ +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{doxygen} + +% Packages used by this style file +\RequirePackage{alltt} +%%\RequirePackage{array} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package +\RequirePackage{calc} +\RequirePackage{float} +%%\RequirePackage{ifthen} %% moved to refman.tex due to workaround for LaTex 2019 version and unmaintained tabu package +\RequirePackage{verbatim} +\RequirePackage[table]{xcolor} +\RequirePackage{longtable_doxygen} +\RequirePackage{tabu_doxygen} +\RequirePackage{fancyvrb} +\RequirePackage{tabularx} +\RequirePackage{multirow} +\RequirePackage{hanging} +\RequirePackage{ifpdf} +\RequirePackage{adjustbox} +\RequirePackage{amssymb} +\RequirePackage{stackengine} +\RequirePackage[normalem]{ulem} % for strikeout, but don't modify emphasis + +%---------- Internal commands used in this style file ---------------- + +\newcommand{\ensurespace}[1]{% + \begingroup% + \setlength{\dimen@}{#1}% + \vskip\z@\@plus\dimen@% + \penalty -100\vskip\z@\@plus -\dimen@% + \vskip\dimen@% + \penalty 9999% + \vskip -\dimen@% + \vskip\z@skip% hide the previous |\vskip| from |\addvspace| + \endgroup% +} + +\newcommand{\DoxyHorRuler}[1]{% + \setlength{\parskip}{0ex plus 0ex minus 0ex}% + \ifthenelse{#1=0}% + {% + \hrule% + }% + {% + \hrulefilll% + }% +} +\newcommand{\DoxyLabelFont}{} +\newcommand{\entrylabel}[1]{% + {% + \parbox[b]{\labelwidth-4pt}{% + \makebox[0pt][l]{\DoxyLabelFont#1}% + \vspace{1.5\baselineskip}% + }% + }% +} + +\newenvironment{DoxyDesc}[1]{% + \ensurespace{4\baselineskip}% + \begin{list}{}{% + \settowidth{\labelwidth}{20pt}% + %\setlength{\parsep}{0pt}% + \setlength{\itemsep}{0pt}% + \setlength{\leftmargin}{\labelwidth+\labelsep}% + \renewcommand{\makelabel}{\entrylabel}% + }% + \item[#1]% +}{% + \end{list}% +} + +\newsavebox{\xrefbox} +\newlength{\xreflength} +\newcommand{\xreflabel}[1]{% + \sbox{\xrefbox}{#1}% + \setlength{\xreflength}{\wd\xrefbox}% + \ifthenelse{\xreflength>\labelwidth}{% + \begin{minipage}{\textwidth}% + \setlength{\parindent}{0pt}% + \hangindent=15pt\bfseries #1\vspace{1.2\itemsep}% + \end{minipage}% + }{% + \parbox[b]{\labelwidth}{\makebox[0pt][l]{\textbf{#1}}}% + }% +} + +%---------- Commands used by doxygen LaTeX output generator ---------- + +% Used by
     ... 
    +\newenvironment{DoxyPre}{% + \small% + \begin{alltt}% +}{% + \end{alltt}% + \normalsize% +} +% Necessary for redefining not defined characters, i.e. "Replacement Character" in tex output. +\newlength{\CodeWidthChar} +\newlength{\CodeHeightChar} +\settowidth{\CodeWidthChar}{?} +\settoheight{\CodeHeightChar}{?} +% Necessary for hanging indent +\newlength{\DoxyCodeWidth} + +\newcommand\DoxyCodeLine[1]{\hangpara{\DoxyCodeWidth}{1}{#1}\par} + +\newcommand\NiceSpace{% + \discretionary{}{\kern\fontdimen2\font}{\kern\fontdimen2\font}% +} + +% Used by @code ... @endcode +\newenvironment{DoxyCode}[1]{% + \par% + \scriptsize% + \normalfont\ttfamily% + \rightskip0pt plus 1fil% + \settowidth{\DoxyCodeWidth}{000000}% + \settowidth{\CodeWidthChar}{?}% + \settoheight{\CodeHeightChar}{?}% + \setlength{\parskip}{0ex plus 0ex minus 0ex}% + \ifthenelse{\equal{#1}{0}} + { + {\lccode`~32 \lowercase{\global\let~}\NiceSpace}\obeyspaces% + } + { + {\lccode`~32 \lowercase{\global\let~}}\obeyspaces% + } + +}{% + \normalfont% + \normalsize% + \settowidth{\CodeWidthChar}{?}% + \settoheight{\CodeHeightChar}{?}% +} + +% Redefining not defined characters, i.e. "Replacement Character" in tex output. +\def\ucr{\adjustbox{width=\CodeWidthChar,height=\CodeHeightChar}{\stackinset{c}{}{c}{-.2pt}{% + \textcolor{white}{\sffamily\bfseries\small ?}}{% + \rotatebox{45}{$\blacksquare$}}}} + +% Used by @example, @include, @includelineno and @dontinclude +\newenvironment{DoxyCodeInclude}[1]{% + \DoxyCode{#1}% +}{% + \endDoxyCode% +} + +% Used by @verbatim ... @endverbatim +\newenvironment{DoxyVerb}{% + \footnotesize% + \verbatim% +}{% + \endverbatim% + \normalsize% +} + +% Used by @verbinclude +\newenvironment{DoxyVerbInclude}{% + \DoxyVerb% +}{% + \endDoxyVerb% +} + +% Used by numbered lists (using '-#' or
      ...
    ) +\newenvironment{DoxyEnumerate}{% + \enumerate% +}{% + \endenumerate% +} + +% Used by bullet lists (using '-', @li, @arg, or
      ...
    ) +\newenvironment{DoxyItemize}{% + \itemize% +}{% + \enditemize% +} + +% Used by description lists (using
    ...
    ) +\newenvironment{DoxyDescription}{% + \description% +}{% + \enddescription% +} + +% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc +% (only if caption is specified) +\newenvironment{DoxyImage}{% + \begin{figure}[H]% + \begin{center}% +}{% + \end{center}% + \end{figure}% +} + +% Used by @image, @dotfile, @dot ... @enddot, and @msc ... @endmsc +% (only if no caption is specified) +\newenvironment{DoxyImageNoCaption}{% + \begin{center}% +}{% + \end{center}% +} + +% Used by @image +% (only if inline is specified) +\newenvironment{DoxyInlineImage}{% +}{% +} + +% Used by @attention +\newenvironment{DoxyAttention}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @author and @authors +\newenvironment{DoxyAuthor}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @date +\newenvironment{DoxyDate}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @invariant +\newenvironment{DoxyInvariant}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @note +\newenvironment{DoxyNote}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @post +\newenvironment{DoxyPostcond}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @pre +\newenvironment{DoxyPrecond}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @copyright +\newenvironment{DoxyCopyright}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @remark +\newenvironment{DoxyRemark}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @return and @returns +\newenvironment{DoxyReturn}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @since +\newenvironment{DoxySince}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @see +\newenvironment{DoxySeeAlso}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @version +\newenvironment{DoxyVersion}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @warning +\newenvironment{DoxyWarning}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by @internal +\newenvironment{DoxyInternal}[1]{% + \paragraph*{#1}% +}{% +} + +% Used by @par and @paragraph +\newenvironment{DoxyParagraph}[1]{% + \begin{DoxyDesc}{#1}% +}{% + \end{DoxyDesc}% +} + +% Used by parameter lists +\newenvironment{DoxyParams}[2][]{% + \tabulinesep=1mm% + \par% + \ifthenelse{\equal{#1}{}}% + {\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|}}% name + description + {\ifthenelse{\equal{#1}{1}}% + {\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + name + desc + {\begin{longtabu*}spread 0pt [l]{|X[-1,l]|X[-1,l]|X[-1,l]|X[-1,l]|}}% in/out + type + name + desc + } + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]% + \hline% + \endfirsthead% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #2}\\[1ex]% + \hline% + \endhead% +}{% + \end{longtabu*}% + \vspace{6pt}% +} + +% Used for fields of simple structs +\newenvironment{DoxyFields}[1]{% + \tabulinesep=1mm% + \par% + \begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|X[-1,l]|}% + \multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endfirsthead% + \multicolumn{3}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endhead% +}{% + \end{longtabu*}% + \vspace{6pt}% +} + +% Used for fields simple class style enums +\newenvironment{DoxyEnumFields}[1]{% + \tabulinesep=1mm% + \par% + \begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endfirsthead% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endhead% +}{% + \end{longtabu*}% + \vspace{6pt}% +} + +% Used for parameters within a detailed function description +\newenvironment{DoxyParamCaption}{% + \renewcommand{\item}[2][]{\\ \hspace*{2.0cm} ##1 {\em ##2}}% +}{% +} + +% Used by return value lists +\newenvironment{DoxyRetVals}[1]{% + \tabulinesep=1mm% + \par% + \begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endfirsthead% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endhead% +}{% + \end{longtabu*}% + \vspace{6pt}% +} + +% Used by exception lists +\newenvironment{DoxyExceptions}[1]{% + \tabulinesep=1mm% + \par% + \begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endfirsthead% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endhead% +}{% + \end{longtabu*}% + \vspace{6pt}% +} + +% Used by template parameter lists +\newenvironment{DoxyTemplParams}[1]{% + \tabulinesep=1mm% + \par% + \begin{longtabu*}spread 0pt [l]{|X[-1,r]|X[-1,l]|}% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endfirsthead% + \multicolumn{2}{l}{\hspace{-6pt}\bfseries\fontseries{bc}\selectfont\color{darkgray} #1}\\[1ex]% + \hline% + \endhead% +}{% + \end{longtabu*}% + \vspace{6pt}% +} + +% Used for member lists +\newenvironment{DoxyCompactItemize}{% + \begin{itemize}% + \setlength{\itemsep}{-3pt}% + \setlength{\parsep}{0pt}% + \setlength{\topsep}{0pt}% + \setlength{\partopsep}{0pt}% +}{% + \end{itemize}% +} + +% Used for member descriptions +\newenvironment{DoxyCompactList}{% + \begin{list}{}{% + \setlength{\leftmargin}{0.5cm}% + \setlength{\itemsep}{0pt}% + \setlength{\parsep}{0pt}% + \setlength{\topsep}{0pt}% + \renewcommand{\makelabel}{\hfill}% + }% +}{% + \end{list}% +} + +% Used for reference lists (@bug, @deprecated, @todo, etc.) +\newenvironment{DoxyRefList}{% + \begin{list}{}{% + \setlength{\labelwidth}{10pt}% + \setlength{\leftmargin}{\labelwidth}% + \addtolength{\leftmargin}{\labelsep}% + \renewcommand{\makelabel}{\xreflabel}% + }% +}{% + \end{list}% +} + +% Used by @bug, @deprecated, @todo, etc. +\newenvironment{DoxyRefDesc}[1]{% + \begin{list}{}{% + \renewcommand\makelabel[1]{\textbf{##1}}% + \settowidth\labelwidth{\makelabel{#1}}% + \setlength\leftmargin{\labelwidth+\labelsep}% + }% +}{% + \end{list}% +} + +% Used by parameter lists and simple sections +\newenvironment{Desc} +{\begin{list}{}{% + \settowidth{\labelwidth}{20pt}% + \setlength{\parsep}{0pt}% + \setlength{\itemsep}{0pt}% + \setlength{\leftmargin}{\labelwidth+\labelsep}% + \renewcommand{\makelabel}{\entrylabel}% + } +}{% + \end{list}% +} + +% Used by tables +\newcommand{\PBS}[1]{\let\temp=\\#1\let\\=\temp}% +\newenvironment{TabularC}[1]% +{\tabulinesep=1mm +\begin{longtabu*}spread 0pt [c]{*#1{|X[-1]}|}}% +{\end{longtabu*}\par}% + +\newenvironment{TabularNC}[1]% +{\begin{tabu}spread 0pt [l]{*#1{|X[-1]}|}}% +{\end{tabu}\par}% + +% Used for member group headers +\newenvironment{Indent}{% + \begin{list}{}{% + \setlength{\leftmargin}{0.5cm}% + }% + \item[]\ignorespaces% +}{% + \unskip% + \end{list}% +} + +% Used when hyperlinks are turned off +\newcommand{\doxyref}[3]{% + \textbf{#1} (\textnormal{#2}\,\pageref{#3})% +} + +% Used to link to a table when hyperlinks are turned on +\newcommand{\doxytablelink}[2]{% + \ref{#1}% +} + +% Used to link to a table when hyperlinks are turned off +\newcommand{\doxytableref}[3]{% + \ref{#3}% +} + +% Used by @addindex +\newcommand{\lcurly}{\{} +\newcommand{\rcurly}{\}} + +% Colors used for syntax highlighting +\definecolor{comment}{rgb}{0.5,0.0,0.0} +\definecolor{keyword}{rgb}{0.0,0.5,0.0} +\definecolor{keywordtype}{rgb}{0.38,0.25,0.125} +\definecolor{keywordflow}{rgb}{0.88,0.5,0.0} +\definecolor{preprocessor}{rgb}{0.5,0.38,0.125} +\definecolor{stringliteral}{rgb}{0.0,0.125,0.25} +\definecolor{charliteral}{rgb}{0.0,0.5,0.5} +\definecolor{vhdldigit}{rgb}{1.0,0.0,1.0} +\definecolor{vhdlkeyword}{rgb}{0.43,0.0,0.43} +\definecolor{vhdllogic}{rgb}{1.0,0.0,0.0} +\definecolor{vhdlchar}{rgb}{0.0,0.0,0.0} + +% Color used for table heading +\newcommand{\tableheadbgcolor}{lightgray}% + +% Version of hypertarget with correct landing location +\newcommand{\Hypertarget}[1]{\Hy@raisedlink{\hypertarget{#1}{}}} + +% possibility to have sections etc. be within the margins +% unfortunately had to copy part of book.cls and add \raggedright +\makeatletter +\newcommand\doxysection{\@startsection {section}{1}{\z@}% + {-3.5ex \@plus -1ex \@minus -.2ex}% + {2.3ex \@plus.2ex}% + {\raggedright\normalfont\Large\bfseries}} +\newcommand\doxysubsection{\@startsection{subsection}{2}{\z@}% + {-3.25ex\@plus -1ex \@minus -.2ex}% + {1.5ex \@plus .2ex}% + {\raggedright\normalfont\large\bfseries}} +\newcommand\doxysubsubsection{\@startsection{subsubsection}{3}{\z@}% + {-3.25ex\@plus -1ex \@minus -.2ex}% + {1.5ex \@plus .2ex}% + {\raggedright\normalfont\normalsize\bfseries}} +\newcommand\doxyparagraph{\@startsection{paragraph}{4}{\z@}% + {3.25ex \@plus1ex \@minus.2ex}% + {-1em}% + {\raggedright\normalfont\normalsize\bfseries}} +\newcommand\doxysubparagraph{\@startsection{subparagraph}{5}{\parindent}% + {3.25ex \@plus1ex \@minus .2ex}% + {-1em}% + {\raggedright\normalfont\normalsize\bfseries}} +\makeatother +% Define caption that is also suitable in a table +\makeatletter +\def\doxyfigcaption{% +\refstepcounter{figure}% +\@dblarg{\@caption{figure}}} +\makeatother diff --git a/docs/docs/doxygen/latex/eltwise-add-mod_8hpp.tex b/docs/docs/doxygen/latex/eltwise-add-mod_8hpp.tex new file mode 100644 index 00000000..d124025c --- /dev/null +++ b/docs/docs/doxygen/latex/eltwise-add-mod_8hpp.tex @@ -0,0 +1,15 @@ +\hypertarget{eltwise-add-mod_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/add-\/mod.hpp File Reference} +\label{eltwise-add-mod_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/add-\/mod.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/add-\/mod.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_a319244a133f57825ba7e593ad5c71709}{intel\+::hexl\+::\+Eltwise\+Add\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, const uint64\+\_\+t $\ast$operand2, uint64\+\_\+t n, uint64\+\_\+t modulus) +\begin{DoxyCompactList}\small\item\em Adds two vectors elementwise with modular reduction. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/eltwise-cmp-add_8hpp.tex b/docs/docs/doxygen/latex/eltwise-cmp-add_8hpp.tex new file mode 100644 index 00000000..accaa69b --- /dev/null +++ b/docs/docs/doxygen/latex/eltwise-cmp-add_8hpp.tex @@ -0,0 +1,16 @@ +\hypertarget{eltwise-cmp-add_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/cmp-\/add.hpp File Reference} +\label{eltwise-cmp-add_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/cmp-\/add.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/cmp-\/add.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/util/util.\+hpp\char`\"{}}\newline +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_ad09f0d71efdfbde79c2a8ed92d7da811}{intel\+::hexl\+::\+Eltwise\+Cmp\+Add}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, C\+M\+P\+I\+NT cmp, uint64\+\_\+t bound, uint64\+\_\+t diff, uint64\+\_\+t n) +\begin{DoxyCompactList}\small\item\em Computes element-\/wise conditional addition. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/eltwise-cmp-sub-mod_8hpp.tex b/docs/docs/doxygen/latex/eltwise-cmp-sub-mod_8hpp.tex new file mode 100644 index 00000000..c1098a21 --- /dev/null +++ b/docs/docs/doxygen/latex/eltwise-cmp-sub-mod_8hpp.tex @@ -0,0 +1,16 @@ +\hypertarget{eltwise-cmp-sub-mod_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/cmp-\/sub-\/mod.hpp File Reference} +\label{eltwise-cmp-sub-mod_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/cmp-\/sub-\/mod.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/cmp-\/sub-\/mod.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/util/util.\+hpp\char`\"{}}\newline +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_aa06f039b71cf61990911e753595f1f78}{intel\+::hexl\+::\+Eltwise\+Cmp\+Sub\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, C\+M\+P\+I\+NT cmp, uint64\+\_\+t bound, uint64\+\_\+t diff, uint64\+\_\+t modulus, uint64\+\_\+t n) +\begin{DoxyCompactList}\small\item\em Computes element-\/wise conditional modular subtraction. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/eltwise-fma-mod_8hpp.tex b/docs/docs/doxygen/latex/eltwise-fma-mod_8hpp.tex new file mode 100644 index 00000000..24027bd4 --- /dev/null +++ b/docs/docs/doxygen/latex/eltwise-fma-mod_8hpp.tex @@ -0,0 +1,15 @@ +\hypertarget{eltwise-fma-mod_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/fma-\/mod.hpp File Reference} +\label{eltwise-fma-mod_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/fma-\/mod.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/fma-\/mod.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_a5b65d563391b4a1a5041633aeb118aa5}{intel\+::hexl\+::\+Eltwise\+F\+M\+A\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$arg1, uint64\+\_\+t arg2, const uint64\+\_\+t $\ast$arg3, uint64\+\_\+t n, uint64\+\_\+t modulus, uint64\+\_\+t input\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Computes fused multiply-\/add ({\ttfamily arg1} $\ast$ {\ttfamily arg2} + {\ttfamily arg3}) mod {\ttfamily modulus} element-\/wise, broadcasting scalars to vectors. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/eltwise-mult-mod_8hpp.tex b/docs/docs/doxygen/latex/eltwise-mult-mod_8hpp.tex new file mode 100644 index 00000000..e9f53f08 --- /dev/null +++ b/docs/docs/doxygen/latex/eltwise-mult-mod_8hpp.tex @@ -0,0 +1,15 @@ +\hypertarget{eltwise-mult-mod_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/mult-\/mod.hpp File Reference} +\label{eltwise-mult-mod_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/mult-\/mod.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/mult-\/mod.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_a705bc0321d937ae4d1f8d50279e3cff1}{intel\+::hexl\+::\+Eltwise\+Mult\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, const uint64\+\_\+t $\ast$operand2, uint64\+\_\+t n, uint64\+\_\+t modulus, uint64\+\_\+t input\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Multiplies two vectors elementwise with modular reduction. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/eltwise-reduce-mod_8hpp.tex b/docs/docs/doxygen/latex/eltwise-reduce-mod_8hpp.tex new file mode 100644 index 00000000..d78e7c74 --- /dev/null +++ b/docs/docs/doxygen/latex/eltwise-reduce-mod_8hpp.tex @@ -0,0 +1,15 @@ +\hypertarget{eltwise-reduce-mod_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/reduce-\/mod.hpp File Reference} +\label{eltwise-reduce-mod_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/reduce-\/mod.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/eltwise-\/reduce-\/mod.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_af7e59b130824f9cd4ad0aa54c52ad50c}{intel\+::hexl\+::\+Eltwise\+Reduce\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand, uint64\+\_\+t modulus, uint64\+\_\+t n, uint64\+\_\+t input\+\_\+mod\+\_\+factor, uint64\+\_\+t output\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Performs elementwise modular reduction. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/files.tex b/docs/docs/doxygen/latex/files.tex new file mode 100644 index 00000000..b9270a6e --- /dev/null +++ b/docs/docs/doxygen/latex/files.tex @@ -0,0 +1,12 @@ +\doxysection{File List} +Here is a list of all files with brief descriptions\+:\begin{DoxyCompactList} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/\mbox{\hyperlink{intel-hexl_8hpp}{intel-\/hexl.\+hpp}} }{\pageref{intel-hexl_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/\mbox{\hyperlink{eltwise-add-mod_8hpp}{eltwise-\/add-\/mod.\+hpp}} }{\pageref{eltwise-add-mod_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/\mbox{\hyperlink{eltwise-cmp-add_8hpp}{eltwise-\/cmp-\/add.\+hpp}} }{\pageref{eltwise-cmp-add_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/\mbox{\hyperlink{eltwise-cmp-sub-mod_8hpp}{eltwise-\/cmp-\/sub-\/mod.\+hpp}} }{\pageref{eltwise-cmp-sub-mod_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/\mbox{\hyperlink{eltwise-fma-mod_8hpp}{eltwise-\/fma-\/mod.\+hpp}} }{\pageref{eltwise-fma-mod_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/\mbox{\hyperlink{eltwise-mult-mod_8hpp}{eltwise-\/mult-\/mod.\+hpp}} }{\pageref{eltwise-mult-mod_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/eltwise/\mbox{\hyperlink{eltwise-reduce-mod_8hpp}{eltwise-\/reduce-\/mod.\+hpp}} }{\pageref{eltwise-reduce-mod_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt/\mbox{\hyperlink{ntt_8hpp}{ntt.\+hpp}} }{\pageref{ntt_8hpp}}{} +\item\contentsline{section}{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util/\mbox{\hyperlink{util_8hpp}{util.\+hpp}} }{\pageref{util_8hpp}}{} +\end{DoxyCompactList} diff --git a/docs/docs/doxygen/latex/index.tex b/docs/docs/doxygen/latex/index.tex new file mode 100644 index 00000000..f142a7e4 --- /dev/null +++ b/docs/docs/doxygen/latex/index.tex @@ -0,0 +1,186 @@ +Intel H\+E\+XL is an open-\/source library which provides efficient implementations of integer arithmetic on Galois fields. Such arithmetic is prevalent in cryptography, particularly in homomorphic encryption (HE) schemes. Intel H\+E\+XL targets integer arithmetic with word-\/sized primes, typically 40-\/60 bits. Intel H\+E\+XL provides an A\+PI for 64-\/bit unsigned integers and targets Intel C\+P\+Us.\hypertarget{index_autotoc_md1}{}\doxysection{Contents}\label{index_autotoc_md1} + +\begin{DoxyItemize} +\item \href{\#intel-homomorphic-encryption-acceleration-library-hexl}{\texttt{ Intel Homomorphic Encryption Acceleration Library (H\+E\+XL)}} +\begin{DoxyItemize} +\item \href{\#contents}{\texttt{ Contents}} +\item \href{\#introduction}{\texttt{ Introduction}} +\item \href{\#building-intel-hexl}{\texttt{ Building Intel H\+E\+XL}} +\begin{DoxyItemize} +\item \href{\#dependencies}{\texttt{ Dependencies}} +\item \href{\#compile-time-options}{\texttt{ Compile-\/time options}} +\item \href{\#compiling-intel-hexl}{\texttt{ Compiling Intel H\+E\+XL}} +\end{DoxyItemize} +\item \href{\#testing-intel-hexl}{\texttt{ Testing Intel H\+E\+XL}} +\item \href{\#benchmarking-intel-hexl}{\texttt{ Benchmarking Intel H\+E\+XL}} +\item \href{\#using-intel-hexl}{\texttt{ Using Intel H\+E\+XL}} +\item \href{\#debugging}{\texttt{ Debugging}} +\item \href{\#thread-safety}{\texttt{ Thread-\/safety}} +\end{DoxyItemize} +\item \href{\#documentation}{\texttt{ Documentation}} +\begin{DoxyItemize} +\item \href{\#doxygen}{\texttt{ Doxygen}} +\item \href{\#sphinx}{\texttt{ Sphinx}} +\end{DoxyItemize} +\item \href{\#contributing}{\texttt{ Contributing}} +\begin{DoxyItemize} +\item \href{\#repository-layout}{\texttt{ Repository layout}} +\end{DoxyItemize} +\end{DoxyItemize}\hypertarget{index_autotoc_md2}{}\doxysection{Introduction}\label{index_autotoc_md2} +Many cryptographic applications, particularly homomorphic encryption (HE), rely on integer polynomial arithmetic in a finite field. HE, which enables computation on encrypted data, typically uses polynomials with degree {\ttfamily N} a power of two roughly in the range {\ttfamily N=\mbox{[}2$^\wedge$\{10\}, 2$^\wedge$\{17\}\mbox{]}}. The coefficients of these polynomials are in a finite field with a word-\/sized primes, {\ttfamily p}, up to {\ttfamily p}$\sim$62 bits. More precisely, the polynomials live in the ring {\ttfamily Z\+\_\+p\mbox{[}X\mbox{]}/(X$^\wedge$N + 1)}. That is, when adding or multiplying two polynomials, each coefficient of the result is reduced by the prime modulus {\ttfamily p}. When multiplying two polynomials, the resulting polynomials of degree {\ttfamily 2N} is additionally reduced by taking the remainder when dividing by {\ttfamily X$^\wedge$\+N+1}. + +The primary bottleneck in many HE applications is polynomial-\/polynomial multiplication in {\ttfamily Z\+\_\+p\mbox{[}X\mbox{]}/(X$^\wedge$N + 1)}. For efficient implementation, Intel H\+E\+XL implements the negacyclic number-\/theoretic transform (N\+TT). To multiply two polynomials, {\ttfamily p\+\_\+1(x), p\+\_\+2(x)} using the N\+TT, we perform the Fwd\+N\+TT on the two input polynomials, then perform an element-\/wise modular multiplication, and perform the Inv\+N\+TT on the result. + +Intel H\+E\+XL implements the following functions\+: +\begin{DoxyItemize} +\item The forward and inverse negacyclic number-\/theoretic transform (N\+TT) +\item Element-\/wise vector-\/vector modular multiplication +\item Element-\/wise vector-\/scalar modular multiplication with optional addition +\item Element-\/wise modular multiplication +\end{DoxyItemize} + +For each function, the library implements one or several Intel(\+R) A\+V\+X-\/512 implementations, as well as a less performant, more readable native C++ implementation. Intel H\+E\+XL will automatically choose the best implementation for the given C\+PU Intel(\+R) A\+V\+X-\/512 feature set. In particular, when the modulus {\ttfamily p} is less than {\ttfamily 2$^\wedge$\{50\}}, the A\+V\+X512\+I\+F\+MA instruction set available on Intel Ice\+Lake server and Ice\+Lake client will provide a more efficient implementation. + +For additional functionality, see the public headers, located in {\ttfamily include/intel-\/hexl} \hypertarget{index_autotoc_md3}{}\doxysection{Building Intel H\+E\+XL}\label{index_autotoc_md3} +\hypertarget{index_autotoc_md4}{}\doxysubsection{Dependencies}\label{index_autotoc_md4} +We have tested Intel H\+E\+XL on the following operating systems\+: +\begin{DoxyItemize} +\item Ubuntu 18.\+04 +\item mac\+OS 10.\+15 +\item Microsoft Windows 10 +\end{DoxyItemize} + +Intel H\+E\+XL requires the following dependencies\+: + +\tabulinesep=1mm +\begin{longtabu}spread 0pt [c]{*{2}{|X[-1]}|} +\hline +\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ Dependency }&\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ Version }\\\cline{1-2} +\endfirsthead +\hline +\endfoot +\hline +\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ Dependency }&\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ Version }\\\cline{1-2} +\endhead +C\+Make &$>$= 3.\+5.\+1 \\\cline{1-2} +Compiler &gcc $>$= 7.\+0, clang++ $>$= 5.\+0, M\+S\+VC $>$= 2019 \\\cline{1-2} +\end{longtabu} + + +For best performance, we recommend using a processor with A\+V\+X512-\/\+I\+F\+M\+A52 support, and a recent compiler (gcc $>$= 8.\+0, clang++ $>$= 6.\+0). To determine if your process supports A\+V\+X512-\/\+I\+F\+M\+A52, simply look for {\ttfamily H\+E\+X\+L\+\_\+\+H\+A\+S\+\_\+\+A\+V\+X512\+I\+F\+MA} during the configure step (see \href{\#compiling-intel-hexl}{\texttt{ Compiling Intel H\+E\+XL}}).\hypertarget{index_autotoc_md5}{}\doxysubsection{Compile-\/time options}\label{index_autotoc_md5} +In addition to the standard C\+Make build options, Intel H\+E\+XL supports several compile-\/time flags to configure the build. For convenience, they are listed below\+: + +\tabulinesep=1mm +\begin{longtabu}spread 0pt [c]{*{3}{|X[-1]}|} +\hline +\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ C\+Make option }&\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ Values }&\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ }\\\cline{1-3} +\endfirsthead +\hline +\endfoot +\hline +\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ C\+Make option }&\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ Values }&\PBS\centering \cellcolor{\tableheadbgcolor}\textbf{ }\\\cline{1-3} +\endhead +H\+E\+X\+L\+\_\+\+B\+E\+N\+C\+H\+M\+A\+RK &ON / O\+FF (default ON) &Set to ON to enable benchmark suite via Google benchmark \\\cline{1-3} +H\+E\+X\+L\+\_\+\+C\+O\+V\+E\+R\+A\+GE &ON / O\+FF (default O\+FF) &Set to ON to enable coverage report of unit-\/tests \\\cline{1-3} +H\+E\+X\+L\+\_\+\+D\+E\+B\+UG &ON / O\+FF (default O\+FF) &Set to ON to enable debugging at large runtime penalty \\\cline{1-3} +H\+E\+X\+L\+\_\+\+D\+O\+CS &ON / O\+FF (default O\+FF) &Set to ON to enable building of documentation \\\cline{1-3} +H\+E\+X\+L\+\_\+\+E\+N\+A\+B\+L\+E\+\_\+\+A\+D\+D\+R\+E\+S\+S\+\_\+\+S\+A\+N\+I\+T\+I\+Z\+ER &ON / O\+FF (default O\+FF) &Set to ON to enable building with address sanitizer (A\+San) \\\cline{1-3} +H\+E\+X\+L\+\_\+\+E\+N\+A\+B\+L\+E\+\_\+\+T\+H\+R\+E\+A\+D\+\_\+\+S\+A\+N\+I\+T\+I\+Z\+ER &ON / O\+FF (default O\+FF) &Set to ON to enable building with thread sanitizer (T\+San) \\\cline{1-3} +H\+E\+X\+L\+\_\+\+E\+N\+A\+B\+L\+E\+\_\+\+U\+B\+\_\+\+S\+A\+N\+I\+T\+I\+Z\+ER &ON / O\+FF (default O\+FF) &Set to ON to enable building with undefined behavior sanitizer (U\+B\+San) \\\cline{1-3} +H\+E\+X\+L\+\_\+\+E\+X\+P\+O\+RT &ON / O\+FF (default O\+FF) &Set to ON to enable export of Intel H\+E\+XL for use in 3rd-\/party project \\\cline{1-3} +H\+E\+X\+L\+\_\+\+S\+H\+A\+R\+E\+D\+\_\+\+L\+IB &ON / O\+FF (default O\+FF) &Set to ON to enable building shared library \\\cline{1-3} +H\+E\+X\+L\+\_\+\+T\+E\+S\+T\+I\+NG &ON / O\+FF (default ON) &Set to ON to enable building of unit-\/tests \\\cline{1-3} +\end{longtabu} +\hypertarget{index_autotoc_md6}{}\doxysubsection{Compiling Intel H\+E\+XL}\label{index_autotoc_md6} +The instructions to build Intel H\+E\+XL are common between Linux, Mac\+OS, and Windows. + +To compile Intel H\+E\+XL from source code, first clone the repository into your current directory. Then, to configure the build, call +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/S . -\/B build} +\end{DoxyCode} + + +adding the desired compile-\/time options with a {\ttfamily -\/D} flag. For instance, to build Intel H\+E\+XL with debugging capabilities, call +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/S . -\/B build -\/DHEXL\_DEBUG=ON} +\end{DoxyCode} + + +Then, to build Intel H\+E\+XL, call +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/-\/build build} +\end{DoxyCode} + + +This will build the Intel H\+E\+XL library in the {\ttfamily build/intel-\/hexl/lib/} directory. + +To install Intel H\+E\+XL to the installation directory, run +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/-\/install build} +\end{DoxyCode} + + +To use a non-\/standard installation directory, configure the build with +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/S . -\/B build -\/DCMAKE\_INSTALL\_PREFIX=/path/to/install} +\end{DoxyCode} +\hypertarget{index_autotoc_md7}{}\doxysection{Testing Intel H\+E\+XL}\label{index_autotoc_md7} +To run a set of unit tests via Googletest, configure and build Intel H\+E\+XL with {\ttfamily -\/D\+H\+E\+X\+L\+\_\+\+T\+E\+S\+T\+I\+NG=ON} (see \href{\#compile-time-options}{\texttt{ Compile-\/time options}}). Then, run +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/-\/build build -\/-\/target unittest} +\end{DoxyCode} + + +The unit-\/test executable itself is located at {\ttfamily build/test/unit-\/test} \hypertarget{index_autotoc_md8}{}\doxysection{Benchmarking Intel H\+E\+XL}\label{index_autotoc_md8} +To run a set of benchmarks via Google benchmark, configure and build Intel H\+E\+XL with {\ttfamily -\/D\+H\+E\+X\+L\+\_\+\+B\+E\+N\+C\+H\+M\+A\+RK=ON} (see \href{\#compile-time-options}{\texttt{ Compile-\/time options}}). Then, run +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/-\/build build -\/-\/target bench} +\end{DoxyCode} + + +The benchmark executable itself is located at {\ttfamily build/benchmark/bench\+\_\+hexl}\hypertarget{index_autotoc_md9}{}\doxysection{Using Intel H\+E\+XL}\label{index_autotoc_md9} +The {\ttfamily example} folder has an example of using Intel H\+E\+XL in a third-\/party project.\hypertarget{index_autotoc_md10}{}\doxysection{Debugging}\label{index_autotoc_md10} +For optimal performance, Intel H\+E\+XL does not perform input validation. In many cases the time required for the validation would be longer than the execution of the function itself. To debug Intel H\+E\+XL, configure and build Intel H\+E\+XL with {\ttfamily -\/D\+H\+E\+X\+L\+\_\+\+D\+E\+B\+UG=ON} (see \href{\#compile-time-options}{\texttt{ Compile-\/time options}}). This will generate a debug version of the library, e.\+g. {\ttfamily libintel\+\_\+hexl\+\_\+debug.\+a}, that can be used to debug the execution. + +{\bfseries{Note}}, enabling {\ttfamily H\+E\+X\+L\+\_\+\+D\+E\+B\+UG=ON} will result in a significant runtime overhead. \hypertarget{index_autotoc_md11}{}\doxysection{Thread-\/safety}\label{index_autotoc_md11} +Intel H\+E\+XL is single-\/threaded and thread-\/safe.\hypertarget{index_autotoc_md12}{}\doxysection{Documentation}\label{index_autotoc_md12} +Intel H\+E\+XL supports documentation via Doxygen and sphinx. To build documentation, first install {\ttfamily doxygen} and {\ttfamily graphviz}, e.\+g. +\begin{DoxyCode}{0} +\DoxyCodeLine{sudo apt-\/get install doxygen graphviz} +\end{DoxyCode} + + +Then, configure Intel H\+E\+XL with {\ttfamily -\/D\+H\+E\+X\+L\+\_\+\+D\+O\+CS=ON} (see \href{\#compile-time-options}{\texttt{ Compile-\/time options}}). \hypertarget{index_autotoc_md13}{}\doxysubsubsection{Doxygen}\label{index_autotoc_md13} +To build Doxygen documentation, after configuring Intel H\+E\+XL with {\ttfamily -\/D\+H\+E\+X\+L\+\_\+\+D\+O\+CS=ON}, run +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/-\/build build -\/-\/target doxygen} +\end{DoxyCode} + + +To view the generated Doxygen documentation, open the generated {\ttfamily build/docs/doxygen/html/index.\+html} file in a web browser.\hypertarget{index_autotoc_md14}{}\doxysubsubsection{Sphinx}\label{index_autotoc_md14} +To build the sphinx documentation, install {\ttfamily sphinx} and required dependencies {\ttfamily breathe, m2r2}, e.\+g. +\begin{DoxyCode}{0} +\DoxyCodeLine{sudo apt-\/get install python3-\/sphinx} +\DoxyCodeLine{pip3 install breathe m2r2} +\end{DoxyCode} + + +Then, after configuring Intel H\+E\+XL with {\ttfamily -\/D\+H\+E\+X\+L\+\_\+\+D\+O\+CS=ON}, run +\begin{DoxyCode}{0} +\DoxyCodeLine{cmake -\/-\/build build -\/-\/target docs} +\end{DoxyCode} + + +To view the generated Sphinx documentation, open the generated {\ttfamily build/docs/sphinx/html/index.\+html} file in a web browser.\hypertarget{index_autotoc_md15}{}\doxysection{Contributing}\label{index_autotoc_md15} +At this time, Intel H\+E\+XL does not accept external contributions. Feel free to discuss via issues. + +For Intel developers, use \href{https://pre-commit.com/}{\texttt{ pre-\/commit}} to validate the formatting of the code. + +Before contributing, please run +\begin{DoxyCode}{0} +\DoxyCodeLine{make check} +\end{DoxyCode} + + +and make sure all unit tests and pre-\/commit checks pass.\hypertarget{index_autotoc_md16}{}\doxysubsection{Repository layout}\label{index_autotoc_md16} +Public headers reside in the {\ttfamily intel-\/hexl/include} folder. Private headers, e.\+g. those containing Intel(\+R) A\+V\+X-\/512 code should not be put in this folder. \ No newline at end of file diff --git a/docs/docs/doxygen/latex/intel-hexl_8hpp.tex b/docs/docs/doxygen/latex/intel-hexl_8hpp.tex new file mode 100644 index 00000000..bf688533 --- /dev/null +++ b/docs/docs/doxygen/latex/intel-hexl_8hpp.tex @@ -0,0 +1,10 @@ +\hypertarget{intel-hexl_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/intel-\/hexl.hpp File Reference} +\label{intel-hexl_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/intel-\/hexl.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/intel-\/hexl.hpp}} +{\ttfamily \#include \char`\"{}intel-\/hexl/eltwise/eltwise-\/add-\/mod.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/eltwise/eltwise-\/cmp-\/add.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/eltwise/eltwise-\/cmp-\/sub-\/mod.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/eltwise/eltwise-\/fma-\/mod.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/eltwise/eltwise-\/mult-\/mod.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/eltwise/eltwise-\/reduce-\/mod.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/ntt/ntt.\+hpp\char`\"{}}\newline +{\ttfamily \#include \char`\"{}intel-\/hexl/util/util.\+hpp\char`\"{}}\newline diff --git a/docs/docs/doxygen/latex/longtable_doxygen.sty b/docs/docs/doxygen/latex/longtable_doxygen.sty new file mode 100644 index 00000000..a0eb314f --- /dev/null +++ b/docs/docs/doxygen/latex/longtable_doxygen.sty @@ -0,0 +1,448 @@ +%% +%% This is file `longtable.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% longtable.dtx (with options: `package') +%% +%% This is a generated file. +%% +%% The source is maintained by the LaTeX Project team and bug +%% reports for it can be opened at http://latex-project.org/bugs.html +%% (but please observe conditions on bug reports sent to that address!) +%% +%% Copyright 1993-2016 +%% The LaTeX3 Project and any individual authors listed elsewhere +%% in this file. +%% +%% This file was generated from file(s) of the Standard LaTeX `Tools Bundle'. +%% -------------------------------------------------------------------------- +%% +%% It may be distributed and/or modified under the +%% conditions of the LaTeX Project Public License, either version 1.3c +%% of this license or (at your option) any later version. +%% The latest version of this license is in +%% http://www.latex-project.org/lppl.txt +%% and version 1.3c or later is part of all distributions of LaTeX +%% version 2005/12/01 or later. +%% +%% This file may only be distributed together with a copy of the LaTeX +%% `Tools Bundle'. You may however distribute the LaTeX `Tools Bundle' +%% without such generated files. +%% +%% The list of all files belonging to the LaTeX `Tools Bundle' is +%% given in the file `manifest.txt'. +%% +%% File: longtable.dtx Copyright (C) 1990-2001 David Carlisle +\NeedsTeXFormat{LaTeX2e}[1995/06/01] +\ProvidesPackage{longtable_doxygen} + [2014/10/28 v4.11 Multi-page Table package (DPC) - frozen version for doxygen] +\def\LT@err{\PackageError{longtable}} +\def\LT@warn{\PackageWarning{longtable}} +\def\LT@final@warn{% + \AtEndDocument{% + \LT@warn{Table \@width s have changed. Rerun LaTeX.\@gobbletwo}}% + \global\let\LT@final@warn\relax} +\DeclareOption{errorshow}{% + \def\LT@warn{\PackageInfo{longtable}}} +\DeclareOption{pausing}{% + \def\LT@warn#1{% + \LT@err{#1}{This is not really an error}}} +\DeclareOption{set}{} +\DeclareOption{final}{} +\ProcessOptions +\newskip\LTleft \LTleft=\fill +\newskip\LTright \LTright=\fill +\newskip\LTpre \LTpre=\bigskipamount +\newskip\LTpost \LTpost=\bigskipamount +\newcount\LTchunksize \LTchunksize=20 +\let\c@LTchunksize\LTchunksize +\newdimen\LTcapwidth \LTcapwidth=4in +\newbox\LT@head +\newbox\LT@firsthead +\newbox\LT@foot +\newbox\LT@lastfoot +\newcount\LT@cols +\newcount\LT@rows +\newcounter{LT@tables} +\newcounter{LT@chunks}[LT@tables] +\ifx\c@table\undefined + \newcounter{table} + \def\fnum@table{\tablename~\thetable} +\fi +\ifx\tablename\undefined + \def\tablename{Table} +\fi +\newtoks\LT@p@ftn +\mathchardef\LT@end@pen=30000 +\def\longtable{% + \par + \ifx\multicols\@undefined + \else + \ifnum\col@number>\@ne + \@twocolumntrue + \fi + \fi + \if@twocolumn + \LT@err{longtable not in 1-column mode}\@ehc + \fi + \begingroup + \@ifnextchar[\LT@array{\LT@array[x]}} +\def\LT@array[#1]#2{% + \refstepcounter{table}\stepcounter{LT@tables}% + \if l#1% + \LTleft\z@ \LTright\fill + \else\if r#1% + \LTleft\fill \LTright\z@ + \else\if c#1% + \LTleft\fill \LTright\fill + \fi\fi\fi + \let\LT@mcol\multicolumn + \let\LT@@tabarray\@tabarray + \let\LT@@hl\hline + \def\@tabarray{% + \let\hline\LT@@hl + \LT@@tabarray}% + \let\\\LT@tabularcr\let\tabularnewline\\% + \def\newpage{\noalign{\break}}% + \def\pagebreak{\noalign{\ifnum`}=0\fi\@testopt{\LT@no@pgbk-}4}% + \def\nopagebreak{\noalign{\ifnum`}=0\fi\@testopt\LT@no@pgbk4}% + \let\hline\LT@hline \let\kill\LT@kill\let\caption\LT@caption + \@tempdima\ht\strutbox + \let\@endpbox\LT@endpbox + \ifx\extrarowheight\@undefined + \let\@acol\@tabacol + \let\@classz\@tabclassz \let\@classiv\@tabclassiv + \def\@startpbox{\vtop\LT@startpbox}% + \let\@@startpbox\@startpbox + \let\@@endpbox\@endpbox + \let\LT@LL@FM@cr\@tabularcr + \else + \advance\@tempdima\extrarowheight + \col@sep\tabcolsep + \let\@startpbox\LT@startpbox\let\LT@LL@FM@cr\@arraycr + \fi + \setbox\@arstrutbox\hbox{\vrule + \@height \arraystretch \@tempdima + \@depth \arraystretch \dp \strutbox + \@width \z@}% + \let\@sharp##\let\protect\relax + \begingroup + \@mkpream{#2}% + \xdef\LT@bchunk{% + \global\advance\c@LT@chunks\@ne + \global\LT@rows\z@\setbox\z@\vbox\bgroup + \LT@setprevdepth + \tabskip\LTleft \noexpand\halign to\hsize\bgroup + \tabskip\z@ \@arstrut \@preamble \tabskip\LTright \cr}% + \endgroup + \expandafter\LT@nofcols\LT@bchunk&\LT@nofcols + \LT@make@row + \m@th\let\par\@empty + \everycr{}\lineskip\z@\baselineskip\z@ + \LT@bchunk} +\def\LT@no@pgbk#1[#2]{\penalty #1\@getpen{#2}\ifnum`{=0\fi}} +\def\LT@start{% + \let\LT@start\endgraf + \endgraf\penalty\z@\vskip\LTpre + \dimen@\pagetotal + \advance\dimen@ \ht\ifvoid\LT@firsthead\LT@head\else\LT@firsthead\fi + \advance\dimen@ \dp\ifvoid\LT@firsthead\LT@head\else\LT@firsthead\fi + \advance\dimen@ \ht\LT@foot + \dimen@ii\vfuzz + \vfuzz\maxdimen + \setbox\tw@\copy\z@ + \setbox\tw@\vsplit\tw@ to \ht\@arstrutbox + \setbox\tw@\vbox{\unvbox\tw@}% + \vfuzz\dimen@ii + \advance\dimen@ \ht + \ifdim\ht\@arstrutbox>\ht\tw@\@arstrutbox\else\tw@\fi + \advance\dimen@\dp + \ifdim\dp\@arstrutbox>\dp\tw@\@arstrutbox\else\tw@\fi + \advance\dimen@ -\pagegoal + \ifdim \dimen@>\z@\vfil\break\fi + \global\@colroom\@colht + \ifvoid\LT@foot\else + \advance\vsize-\ht\LT@foot + \global\advance\@colroom-\ht\LT@foot + \dimen@\pagegoal\advance\dimen@-\ht\LT@foot\pagegoal\dimen@ + \maxdepth\z@ + \fi + \ifvoid\LT@firsthead\copy\LT@head\else\box\LT@firsthead\fi\nobreak + \output{\LT@output}} +\def\endlongtable{% + \crcr + \noalign{% + \let\LT@entry\LT@entry@chop + \xdef\LT@save@row{\LT@save@row}}% + \LT@echunk + \LT@start + \unvbox\z@ + \LT@get@widths + \if@filesw + {\let\LT@entry\LT@entry@write\immediate\write\@auxout{% + \gdef\expandafter\noexpand + \csname LT@\romannumeral\c@LT@tables\endcsname + {\LT@save@row}}}% + \fi + \ifx\LT@save@row\LT@@save@row + \else + \LT@warn{Column \@width s have changed\MessageBreak + in table \thetable}% + \LT@final@warn + \fi + \endgraf\penalty -\LT@end@pen + \endgroup + \global\@mparbottom\z@ + \pagegoal\vsize + \endgraf\penalty\z@\addvspace\LTpost + \ifvoid\footins\else\insert\footins{}\fi} +\def\LT@nofcols#1&{% + \futurelet\@let@token\LT@n@fcols} +\def\LT@n@fcols{% + \advance\LT@cols\@ne + \ifx\@let@token\LT@nofcols + \expandafter\@gobble + \else + \expandafter\LT@nofcols + \fi} +\def\LT@tabularcr{% + \relax\iffalse{\fi\ifnum0=`}\fi + \@ifstar + {\def\crcr{\LT@crcr\noalign{\nobreak}}\let\cr\crcr + \LT@t@bularcr}% + {\LT@t@bularcr}} +\let\LT@crcr\crcr +\let\LT@setprevdepth\relax +\def\LT@t@bularcr{% + \global\advance\LT@rows\@ne + \ifnum\LT@rows=\LTchunksize + \gdef\LT@setprevdepth{% + \prevdepth\z@\global + \global\let\LT@setprevdepth\relax}% + \expandafter\LT@xtabularcr + \else + \ifnum0=`{}\fi + \expandafter\LT@LL@FM@cr + \fi} +\def\LT@xtabularcr{% + \@ifnextchar[\LT@argtabularcr\LT@ntabularcr} +\def\LT@ntabularcr{% + \ifnum0=`{}\fi + \LT@echunk + \LT@start + \unvbox\z@ + \LT@get@widths + \LT@bchunk} +\def\LT@argtabularcr[#1]{% + \ifnum0=`{}\fi + \ifdim #1>\z@ + \unskip\@xargarraycr{#1}% + \else + \@yargarraycr{#1}% + \fi + \LT@echunk + \LT@start + \unvbox\z@ + \LT@get@widths + \LT@bchunk} +\def\LT@echunk{% + \crcr\LT@save@row\cr\egroup + \global\setbox\@ne\lastbox + \unskip + \egroup} +\def\LT@entry#1#2{% + \ifhmode\@firstofone{&}\fi\omit + \ifnum#1=\c@LT@chunks + \else + \kern#2\relax + \fi} +\def\LT@entry@chop#1#2{% + \noexpand\LT@entry + {\ifnum#1>\c@LT@chunks + 1}{0pt% + \else + #1}{#2% + \fi}} +\def\LT@entry@write{% + \noexpand\LT@entry^^J% + \@spaces} +\def\LT@kill{% + \LT@echunk + \LT@get@widths + \expandafter\LT@rebox\LT@bchunk} +\def\LT@rebox#1\bgroup{% + #1\bgroup + \unvbox\z@ + \unskip + \setbox\z@\lastbox} +\def\LT@blank@row{% + \xdef\LT@save@row{\expandafter\LT@build@blank + \romannumeral\number\LT@cols 001 }} +\def\LT@build@blank#1{% + \if#1m% + \noexpand\LT@entry{1}{0pt}% + \expandafter\LT@build@blank + \fi} +\def\LT@make@row{% + \global\expandafter\let\expandafter\LT@save@row + \csname LT@\romannumeral\c@LT@tables\endcsname + \ifx\LT@save@row\relax + \LT@blank@row + \else + {\let\LT@entry\or + \if!% + \ifcase\expandafter\expandafter\expandafter\LT@cols + \expandafter\@gobble\LT@save@row + \or + \else + \relax + \fi + !% + \else + \aftergroup\LT@blank@row + \fi}% + \fi} +\let\setlongtables\relax +\def\LT@get@widths{% + \setbox\tw@\hbox{% + \unhbox\@ne + \let\LT@old@row\LT@save@row + \global\let\LT@save@row\@empty + \count@\LT@cols + \loop + \unskip + \setbox\tw@\lastbox + \ifhbox\tw@ + \LT@def@row + \advance\count@\m@ne + \repeat}% + \ifx\LT@@save@row\@undefined + \let\LT@@save@row\LT@save@row + \fi} +\def\LT@def@row{% + \let\LT@entry\or + \edef\@tempa{% + \ifcase\expandafter\count@\LT@old@row + \else + {1}{0pt}% + \fi}% + \let\LT@entry\relax + \xdef\LT@save@row{% + \LT@entry + \expandafter\LT@max@sel\@tempa + \LT@save@row}} +\def\LT@max@sel#1#2{% + {\ifdim#2=\wd\tw@ + #1% + \else + \number\c@LT@chunks + \fi}% + {\the\wd\tw@}} +\def\LT@hline{% + \noalign{\ifnum0=`}\fi + \penalty\@M + \futurelet\@let@token\LT@@hline} +\def\LT@@hline{% + \ifx\@let@token\hline + \global\let\@gtempa\@gobble + \gdef\LT@sep{\penalty-\@medpenalty\vskip\doublerulesep}% + \else + \global\let\@gtempa\@empty + \gdef\LT@sep{\penalty-\@lowpenalty\vskip-\arrayrulewidth}% + \fi + \ifnum0=`{\fi}% + \multispan\LT@cols + \unskip\leaders\hrule\@height\arrayrulewidth\hfill\cr + \noalign{\LT@sep}% + \multispan\LT@cols + \unskip\leaders\hrule\@height\arrayrulewidth\hfill\cr + \noalign{\penalty\@M}% + \@gtempa} +\def\LT@caption{% + \noalign\bgroup + \@ifnextchar[{\egroup\LT@c@ption\@firstofone}\LT@capti@n} +\def\LT@c@ption#1[#2]#3{% + \LT@makecaption#1\fnum@table{#3}% + \def\@tempa{#2}% + \ifx\@tempa\@empty\else + {\let\\\space + \addcontentsline{lot}{table}{\protect\numberline{\thetable}{#2}}}% + \fi} +\def\LT@capti@n{% + \@ifstar + {\egroup\LT@c@ption\@gobble[]}% + {\egroup\@xdblarg{\LT@c@ption\@firstofone}}} +\def\LT@makecaption#1#2#3{% + \LT@mcol\LT@cols c{\hbox to\z@{\hss\parbox[t]\LTcapwidth{% + \sbox\@tempboxa{#1{#2: }#3}% + \ifdim\wd\@tempboxa>\hsize + #1{#2: }#3% + \else + \hbox to\hsize{\hfil\box\@tempboxa\hfil}% + \fi + \endgraf\vskip\baselineskip}% + \hss}}} +\def\LT@output{% + \ifnum\outputpenalty <-\@Mi + \ifnum\outputpenalty > -\LT@end@pen + \LT@err{floats and marginpars not allowed in a longtable}\@ehc + \else + \setbox\z@\vbox{\unvbox\@cclv}% + \ifdim \ht\LT@lastfoot>\ht\LT@foot + \dimen@\pagegoal + \advance\dimen@-\ht\LT@lastfoot + \ifdim\dimen@<\ht\z@ + \setbox\@cclv\vbox{\unvbox\z@\copy\LT@foot\vss}% + \@makecol + \@outputpage + \setbox\z@\vbox{\box\LT@head}% + \fi + \fi + \global\@colroom\@colht + \global\vsize\@colht + \vbox + {\unvbox\z@\box\ifvoid\LT@lastfoot\LT@foot\else\LT@lastfoot\fi}% + \fi + \else + \setbox\@cclv\vbox{\unvbox\@cclv\copy\LT@foot\vss}% + \@makecol + \@outputpage + \global\vsize\@colroom + \copy\LT@head\nobreak + \fi} +\def\LT@end@hd@ft#1{% + \LT@echunk + \ifx\LT@start\endgraf + \LT@err + {Longtable head or foot not at start of table}% + {Increase LTchunksize}% + \fi + \setbox#1\box\z@ + \LT@get@widths + \LT@bchunk} +\def\endfirsthead{\LT@end@hd@ft\LT@firsthead} +\def\endhead{\LT@end@hd@ft\LT@head} +\def\endfoot{\LT@end@hd@ft\LT@foot} +\def\endlastfoot{\LT@end@hd@ft\LT@lastfoot} +\def\LT@startpbox#1{% + \bgroup + \let\@footnotetext\LT@p@ftntext + \setlength\hsize{#1}% + \@arrayparboxrestore + \vrule \@height \ht\@arstrutbox \@width \z@} +\def\LT@endpbox{% + \@finalstrut\@arstrutbox + \egroup + \the\LT@p@ftn + \global\LT@p@ftn{}% + \hfil} +\def\LT@p@ftntext#1{% + \edef\@tempa{\the\LT@p@ftn\noexpand\footnotetext[\the\c@footnote]}% + \global\LT@p@ftn\expandafter{\@tempa{#1}}}% + +\@namedef{ver@longtable.sty}{2014/10/28 v4.11 Multi-page Table package (DPC) - frozen version for doxygen} +\endinput +%% +%% End of file `longtable.sty'. diff --git a/docs/docs/doxygen/latex/namespaceintel.tex b/docs/docs/doxygen/latex/namespaceintel.tex new file mode 100644 index 00000000..2dab7da0 --- /dev/null +++ b/docs/docs/doxygen/latex/namespaceintel.tex @@ -0,0 +1,7 @@ +\hypertarget{namespaceintel}{}\doxysection{intel Namespace Reference} +\label{namespaceintel}\index{intel@{intel}} +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{hexl}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/namespaceintel_1_1hexl.tex b/docs/docs/doxygen/latex/namespaceintel_1_1hexl.tex new file mode 100644 index 00000000..e877166c --- /dev/null +++ b/docs/docs/doxygen/latex/namespaceintel_1_1hexl.tex @@ -0,0 +1,259 @@ +\hypertarget{namespaceintel_1_1hexl}{}\doxysection{intel\+::hexl Namespace Reference} +\label{namespaceintel_1_1hexl}\index{intel::hexl@{intel::hexl}} +\doxysubsection*{Classes} +\begin{DoxyCompactItemize} +\item +class \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}} +\begin{DoxyCompactList}\small\item\em Performs negacyclic forward and inverse number-\/theoretic transform (\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}), commonly used in R\+L\+WE cryptography. \end{DoxyCompactList}\end{DoxyCompactItemize} +\doxysubsection*{Enumerations} +\begin{DoxyCompactItemize} +\item +enum \mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}} \{ \newline +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a2dcbad7477fd40561e8b8198f173bd47}{C\+M\+P\+I\+N\+T\+::\+EQ}} = 0, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac562607189d77eb9dfb707464c1e7b0b}{C\+M\+P\+I\+N\+T\+::\+LT}} = 1, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006acfe6055d2e0503be378bb63449ec7ba6}{C\+M\+P\+I\+N\+T\+::\+LE}} = 2, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a946003f97ccc52d5d3b54ac0ec31bbfc}{C\+M\+P\+I\+N\+T\+::\+F\+A\+L\+SE}} = 3, +\newline +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006adc33066c3993e0d50896e533fd692ce0}{C\+M\+P\+I\+N\+T\+::\+NE}} = 4, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ad7d6a13c7b311ec8a3c9fcfb1919a2f8}{C\+M\+P\+I\+N\+T\+::\+N\+LT}} = 5, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006aacd748f300c5d189c47807e2a9d6ea57}{C\+M\+P\+I\+N\+T\+::\+N\+LE}} = 6, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac0d83f0b82a6b30de8811e69e6d95c61}{C\+M\+P\+I\+N\+T\+::\+T\+R\+UE}} = 7 + \} +\begin{DoxyCompactList}\small\item\em Represents binary operations between two boolean values. \end{DoxyCompactList}\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_a319244a133f57825ba7e593ad5c71709}{Eltwise\+Add\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, const uint64\+\_\+t $\ast$operand2, uint64\+\_\+t n, uint64\+\_\+t modulus) +\begin{DoxyCompactList}\small\item\em Adds two vectors elementwise with modular reduction. \end{DoxyCompactList}\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_ad09f0d71efdfbde79c2a8ed92d7da811}{Eltwise\+Cmp\+Add}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, \mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}} cmp, uint64\+\_\+t bound, uint64\+\_\+t diff, uint64\+\_\+t n) +\begin{DoxyCompactList}\small\item\em Computes element-\/wise conditional addition. \end{DoxyCompactList}\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_aa06f039b71cf61990911e753595f1f78}{Eltwise\+Cmp\+Sub\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, \mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}} cmp, uint64\+\_\+t bound, uint64\+\_\+t diff, uint64\+\_\+t modulus, uint64\+\_\+t n) +\begin{DoxyCompactList}\small\item\em Computes element-\/wise conditional modular subtraction. \end{DoxyCompactList}\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_a5b65d563391b4a1a5041633aeb118aa5}{Eltwise\+F\+M\+A\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$arg1, uint64\+\_\+t arg2, const uint64\+\_\+t $\ast$arg3, uint64\+\_\+t n, uint64\+\_\+t modulus, uint64\+\_\+t input\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Computes fused multiply-\/add ({\ttfamily arg1} $\ast$ {\ttfamily arg2} + {\ttfamily arg3}) mod {\ttfamily modulus} element-\/wise, broadcasting scalars to vectors. \end{DoxyCompactList}\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_a705bc0321d937ae4d1f8d50279e3cff1}{Eltwise\+Mult\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand1, const uint64\+\_\+t $\ast$operand2, uint64\+\_\+t n, uint64\+\_\+t modulus, uint64\+\_\+t input\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Multiplies two vectors elementwise with modular reduction. \end{DoxyCompactList}\item +void \mbox{\hyperlink{namespaceintel_1_1hexl_af7e59b130824f9cd4ad0aa54c52ad50c}{Eltwise\+Reduce\+Mod}} (uint64\+\_\+t $\ast$result, const uint64\+\_\+t $\ast$operand, uint64\+\_\+t modulus, uint64\+\_\+t n, uint64\+\_\+t input\+\_\+mod\+\_\+factor, uint64\+\_\+t output\+\_\+mod\+\_\+factor) +\begin{DoxyCompactList}\small\item\em Performs elementwise modular reduction. \end{DoxyCompactList}\item +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}} \mbox{\hyperlink{namespaceintel_1_1hexl_a8c654502a5e7fe2cfdd198f0fd920f2a}{Not}} (\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}} cmp) +\begin{DoxyCompactList}\small\item\em Returns the logical negation of a binary operation. \end{DoxyCompactList}\end{DoxyCompactItemize} + + +\doxysubsection{Enumeration Type Documentation} +\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}} +\index{intel::hexl@{intel::hexl}!CMPINT@{CMPINT}} +\index{CMPINT@{CMPINT}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{CMPINT}{CMPINT}} +{\footnotesize\ttfamily enum \mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{intel\+::hexl\+::\+C\+M\+P\+I\+NT}}\hspace{0.3cm}{\ttfamily [strong]}} + + + +Represents binary operations between two boolean values. + +\begin{DoxyEnumFields}{Enumerator} +\raisebox{\heightof{T}}[0pt][0pt]{\index{EQ@{EQ}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!EQ@{EQ}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a2dcbad7477fd40561e8b8198f173bd47}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a2dcbad7477fd40561e8b8198f173bd47}} +EQ&Equal. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{LT@{LT}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!LT@{LT}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac562607189d77eb9dfb707464c1e7b0b}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac562607189d77eb9dfb707464c1e7b0b}} +LT&Less than. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{LE@{LE}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!LE@{LE}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006acfe6055d2e0503be378bb63449ec7ba6}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006acfe6055d2e0503be378bb63449ec7ba6}} +LE&Less than or equal. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{FALSE@{FALSE}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!FALSE@{FALSE}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a946003f97ccc52d5d3b54ac0ec31bbfc}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a946003f97ccc52d5d3b54ac0ec31bbfc}} +F\+A\+L\+SE&False. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{NE@{NE}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!NE@{NE}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006adc33066c3993e0d50896e533fd692ce0}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006adc33066c3993e0d50896e533fd692ce0}} +NE&Not equal. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{NLT@{NLT}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!NLT@{NLT}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ad7d6a13c7b311ec8a3c9fcfb1919a2f8}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ad7d6a13c7b311ec8a3c9fcfb1919a2f8}} +N\+LT&Not less than. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{NLE@{NLE}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!NLE@{NLE}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006aacd748f300c5d189c47807e2a9d6ea57}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006aacd748f300c5d189c47807e2a9d6ea57}} +N\+LE&Not less than or equal. \\ +\hline + +\raisebox{\heightof{T}}[0pt][0pt]{\index{TRUE@{TRUE}!intel::hexl@{intel::hexl}}\index{intel::hexl@{intel::hexl}!TRUE@{TRUE}}}\mbox{\Hypertarget{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac0d83f0b82a6b30de8811e69e6d95c61}\label{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac0d83f0b82a6b30de8811e69e6d95c61}} +T\+R\+UE&True. \\ +\hline + +\end{DoxyEnumFields} + + +\doxysubsection{Function Documentation} +\mbox{\Hypertarget{namespaceintel_1_1hexl_a319244a133f57825ba7e593ad5c71709}\label{namespaceintel_1_1hexl_a319244a133f57825ba7e593ad5c71709}} +\index{intel::hexl@{intel::hexl}!EltwiseAddMod@{EltwiseAddMod}} +\index{EltwiseAddMod@{EltwiseAddMod}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{EltwiseAddMod()}{EltwiseAddMod()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+Eltwise\+Add\+Mod (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand1, }\item[{const uint64\+\_\+t $\ast$}]{operand2, }\item[{uint64\+\_\+t}]{n, }\item[{uint64\+\_\+t}]{modulus }\end{DoxyParamCaption})} + + + +Adds two vectors elementwise with modular reduction. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores result \\ +\hline +\mbox{\texttt{ in}} & {\em operand1} & Vector of elements to add. Each element must be less than the modulus \\ +\hline +\mbox{\texttt{ in}} & {\em operand2} & Vector of elements to add. Each element must be less than the modulus \\ +\hline +\mbox{\texttt{ in}} & {\em n} & Number of elements in each vector \\ +\hline +\mbox{\texttt{ in}} & {\em modulus} & Modulus with which to perform modular reduction. Must be in the range $[2, 2^{63} - 1]$\\ +\hline +\end{DoxyParams} +Computes $ operand1[i] = (operand1[i] + operand2[i]) \mod modulus $ for $ i=0, ..., n-1$. \mbox{\Hypertarget{namespaceintel_1_1hexl_ad09f0d71efdfbde79c2a8ed92d7da811}\label{namespaceintel_1_1hexl_ad09f0d71efdfbde79c2a8ed92d7da811}} +\index{intel::hexl@{intel::hexl}!EltwiseCmpAdd@{EltwiseCmpAdd}} +\index{EltwiseCmpAdd@{EltwiseCmpAdd}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{EltwiseCmpAdd()}{EltwiseCmpAdd()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+Eltwise\+Cmp\+Add (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand1, }\item[{\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}}}]{cmp, }\item[{uint64\+\_\+t}]{bound, }\item[{uint64\+\_\+t}]{diff, }\item[{uint64\+\_\+t}]{n }\end{DoxyParamCaption})} + + + +Computes element-\/wise conditional addition. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores the result \\ +\hline +\mbox{\texttt{ in}} & {\em operand1} & Vector of elements to compare; stores result \\ +\hline +\mbox{\texttt{ in}} & {\em cmp} & Comparison operation \\ +\hline +\mbox{\texttt{ in}} & {\em bound} & Scalar to compare against \\ +\hline +\mbox{\texttt{ in}} & {\em diff} & Scalar to conditionally add \\ +\hline +\mbox{\texttt{ in}} & {\em n} & Number of elements in {\ttfamily operand1} \\ +\hline +\end{DoxyParams} +Computes result\mbox{[}i\mbox{]} = cmp(operand1\mbox{[}i\mbox{]}, bound) ? operand1\mbox{[}i\mbox{]} + diff \+: operand1\mbox{[}i\mbox{]} for all $i=0, ..., n-1$. \mbox{\Hypertarget{namespaceintel_1_1hexl_aa06f039b71cf61990911e753595f1f78}\label{namespaceintel_1_1hexl_aa06f039b71cf61990911e753595f1f78}} +\index{intel::hexl@{intel::hexl}!EltwiseCmpSubMod@{EltwiseCmpSubMod}} +\index{EltwiseCmpSubMod@{EltwiseCmpSubMod}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{EltwiseCmpSubMod()}{EltwiseCmpSubMod()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+Eltwise\+Cmp\+Sub\+Mod (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand1, }\item[{\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}}}]{cmp, }\item[{uint64\+\_\+t}]{bound, }\item[{uint64\+\_\+t}]{diff, }\item[{uint64\+\_\+t}]{modulus, }\item[{uint64\+\_\+t}]{n }\end{DoxyParamCaption})} + + + +Computes element-\/wise conditional modular subtraction. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores the result \\ +\hline +\mbox{\texttt{ in}} & {\em operand1} & Vector of elements to compare \\ +\hline +\mbox{\texttt{ in}} & {\em cmp} & Comparison function \\ +\hline +\mbox{\texttt{ in}} & {\em bound} & Scalar to compare against \\ +\hline +\mbox{\texttt{ in}} & {\em diff} & Scalar to subtract by \\ +\hline +\mbox{\texttt{ in}} & {\em modulus} & Modulus to reduce by \\ +\hline +\mbox{\texttt{ in}} & {\em n} & Number of elements in {\ttfamily operand1} \\ +\hline +\end{DoxyParams} +Computes {\ttfamily operand1}\mbox{[}i\mbox{]} = ({\ttfamily cmp}({\ttfamily operand1}, {\ttfamily bound})) ? ({\ttfamily operand1} -\/ {\ttfamily diff}) mod {\ttfamily modulus} \+: {\ttfamily operand1} for all i=0, ..., n-\/1 \mbox{\Hypertarget{namespaceintel_1_1hexl_a5b65d563391b4a1a5041633aeb118aa5}\label{namespaceintel_1_1hexl_a5b65d563391b4a1a5041633aeb118aa5}} +\index{intel::hexl@{intel::hexl}!EltwiseFMAMod@{EltwiseFMAMod}} +\index{EltwiseFMAMod@{EltwiseFMAMod}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{EltwiseFMAMod()}{EltwiseFMAMod()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+Eltwise\+F\+M\+A\+Mod (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{arg1, }\item[{uint64\+\_\+t}]{arg2, }\item[{const uint64\+\_\+t $\ast$}]{arg3, }\item[{uint64\+\_\+t}]{n, }\item[{uint64\+\_\+t}]{modulus, }\item[{uint64\+\_\+t}]{input\+\_\+mod\+\_\+factor }\end{DoxyParamCaption})} + + + +Computes fused multiply-\/add ({\ttfamily arg1} $\ast$ {\ttfamily arg2} + {\ttfamily arg3}) mod {\ttfamily modulus} element-\/wise, broadcasting scalars to vectors. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores the result \\ +\hline +\mbox{\texttt{ in}} & {\em arg1} & Vector to multiply \\ +\hline +\mbox{\texttt{ in}} & {\em arg2} & Scalar to multiply \\ +\hline +\mbox{\texttt{ in}} & {\em arg3} & Vector to add. Will not add if {\ttfamily arg3} == nullptr \\ +\hline +\mbox{\texttt{ in}} & {\em n} & Number of elements in each vector \\ +\hline +\mbox{\texttt{ in}} & {\em modulus} & Modulus with which to perform modular reduction. Must be in the range $ [2, 2^{61} - 1]$ \\ +\hline +\mbox{\texttt{ in}} & {\em input\+\_\+mod\+\_\+factor} & Assumes input elements are in \mbox{[}0, input\+\_\+mod\+\_\+factor $\ast$ p). Must be 1, 2, 4, or 8. \\ +\hline +\end{DoxyParams} +\mbox{\Hypertarget{namespaceintel_1_1hexl_a705bc0321d937ae4d1f8d50279e3cff1}\label{namespaceintel_1_1hexl_a705bc0321d937ae4d1f8d50279e3cff1}} +\index{intel::hexl@{intel::hexl}!EltwiseMultMod@{EltwiseMultMod}} +\index{EltwiseMultMod@{EltwiseMultMod}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{EltwiseMultMod()}{EltwiseMultMod()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+Eltwise\+Mult\+Mod (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand1, }\item[{const uint64\+\_\+t $\ast$}]{operand2, }\item[{uint64\+\_\+t}]{n, }\item[{uint64\+\_\+t}]{modulus, }\item[{uint64\+\_\+t}]{input\+\_\+mod\+\_\+factor }\end{DoxyParamCaption})} + + + +Multiplies two vectors elementwise with modular reduction. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ in}} & {\em result} & Result of element-\/wise multiplication \\ +\hline +\mbox{\texttt{ in}} & {\em operand1} & Vector of elements to multiply. Each element must be less than the modulus. \\ +\hline +\mbox{\texttt{ in}} & {\em operand2} & Vector of elements to multiply. Each element must be less than the modulus. \\ +\hline +\mbox{\texttt{ in}} & {\em n} & Number of elements in each vector \\ +\hline +\mbox{\texttt{ in}} & {\em modulus} & Modulus with which to perform modular reduction \\ +\hline +\mbox{\texttt{ in}} & {\em input\+\_\+mod\+\_\+factor} & Assumes input elements are in \mbox{[}0, input\+\_\+mod\+\_\+factor $\ast$ p) Must be 1, 2 or 4.\\ +\hline +\end{DoxyParams} +Computes {\ttfamily result}\mbox{[}i\mbox{]} = ({\ttfamily operand1}\mbox{[}i\mbox{]} $\ast$ {\ttfamily operand2}\mbox{[}i\mbox{]}) mod {\ttfamily modulus} for i=0, ..., {\ttfamily n} -\/ 1 \mbox{\Hypertarget{namespaceintel_1_1hexl_af7e59b130824f9cd4ad0aa54c52ad50c}\label{namespaceintel_1_1hexl_af7e59b130824f9cd4ad0aa54c52ad50c}} +\index{intel::hexl@{intel::hexl}!EltwiseReduceMod@{EltwiseReduceMod}} +\index{EltwiseReduceMod@{EltwiseReduceMod}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{EltwiseReduceMod()}{EltwiseReduceMod()}} +{\footnotesize\ttfamily void intel\+::hexl\+::\+Eltwise\+Reduce\+Mod (\begin{DoxyParamCaption}\item[{uint64\+\_\+t $\ast$}]{result, }\item[{const uint64\+\_\+t $\ast$}]{operand, }\item[{uint64\+\_\+t}]{modulus, }\item[{uint64\+\_\+t}]{n, }\item[{uint64\+\_\+t}]{input\+\_\+mod\+\_\+factor, }\item[{uint64\+\_\+t}]{output\+\_\+mod\+\_\+factor }\end{DoxyParamCaption})} + + + +Performs elementwise modular reduction. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ out}} & {\em result} & Stores the result \\ +\hline +\mbox{\texttt{ in}} & {\em operand} & \\ +\hline +\mbox{\texttt{ in}} & {\em n} & Number of elements in operand \\ +\hline +\mbox{\texttt{ in}} & {\em modulus} & Modulus with which to perform modular reduction \\ +\hline +\mbox{\texttt{ in}} & {\em input\+\_\+mod\+\_\+factor} & Assumes input elements are in \mbox{[}0, input\+\_\+mod\+\_\+factor $\ast$ p) Must be 0, 1, 2 or 4. input\+\_\+mod\+\_\+factor=0 means, no knowledge of input range. Barrett reduction will be used in this case. input\+\_\+mod\+\_\+factor $>$= output\+\_\+mod\+\_\+factor unless input\+\_\+mod\+\_\+factor == 0 \\ +\hline +\mbox{\texttt{ in}} & {\em output\+\_\+mod\+\_\+factor} & output elements will be in \mbox{[}0, output\+\_\+mod\+\_\+factor +\begin{DoxyItemize} +\item p) Must be 1 or 2. for input\+\_\+mod\+\_\+factor=0, output\+\_\+mod\+\_\+factor will be set to 1. +\end{DoxyItemize}\\ +\hline +\end{DoxyParams} +\mbox{\Hypertarget{namespaceintel_1_1hexl_a8c654502a5e7fe2cfdd198f0fd920f2a}\label{namespaceintel_1_1hexl_a8c654502a5e7fe2cfdd198f0fd920f2a}} +\index{intel::hexl@{intel::hexl}!Not@{Not}} +\index{Not@{Not}!intel::hexl@{intel::hexl}} +\doxysubsubsection{\texorpdfstring{Not()}{Not()}} +{\footnotesize\ttfamily \mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}} intel\+::hexl\+::\+Not (\begin{DoxyParamCaption}\item[{\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{C\+M\+P\+I\+NT}}}]{cmp }\end{DoxyParamCaption})\hspace{0.3cm}{\ttfamily [inline]}} + + + +Returns the logical negation of a binary operation. + + +\begin{DoxyParams}[1]{Parameters} +\mbox{\texttt{ in}} & {\em cmp} & The binary operation to negate \\ +\hline +\end{DoxyParams} diff --git a/docs/docs/doxygen/latex/namespaces.tex b/docs/docs/doxygen/latex/namespaces.tex new file mode 100644 index 00000000..590eb218 --- /dev/null +++ b/docs/docs/doxygen/latex/namespaces.tex @@ -0,0 +1,5 @@ +\doxysection{Namespace List} +Here is a list of all namespaces with brief descriptions\+:\begin{DoxyCompactList} +\item\contentsline{section}{\mbox{\hyperlink{namespaceintel}{intel}} }{\pageref{namespaceintel}}{} +\item\contentsline{section}{\mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} }{\pageref{namespaceintel_1_1hexl}}{} +\end{DoxyCompactList} diff --git a/docs/docs/doxygen/latex/ntt_8hpp.tex b/docs/docs/doxygen/latex/ntt_8hpp.tex new file mode 100644 index 00000000..8ce8c422 --- /dev/null +++ b/docs/docs/doxygen/latex/ntt_8hpp.tex @@ -0,0 +1,17 @@ +\hypertarget{ntt_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt/ntt.hpp File Reference} +\label{ntt_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt/ntt.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/ntt/ntt.hpp}} +{\ttfamily \#include $<$stdint.\+h$>$}\newline +{\ttfamily \#include $<$memory$>$}\newline +{\ttfamily \#include $<$vector$>$}\newline +\doxysubsection*{Classes} +\begin{DoxyCompactItemize} +\item +class \mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{intel\+::hexl\+::\+N\+TT}} +\begin{DoxyCompactList}\small\item\em Performs negacyclic forward and inverse number-\/theoretic transform (\mbox{\hyperlink{classintel_1_1hexl_1_1_n_t_t}{N\+TT}}), commonly used in R\+L\+WE cryptography. \end{DoxyCompactList}\end{DoxyCompactItemize} +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/latex/refman.tex b/docs/docs/doxygen/latex/refman.tex new file mode 100644 index 00000000..e4ab293a --- /dev/null +++ b/docs/docs/doxygen/latex/refman.tex @@ -0,0 +1,208 @@ +\let\mypdfximage\pdfximage\def\pdfximage{\immediate\mypdfximage}\documentclass[twoside]{book} + +%% moved from doxygen.sty due to workaround for LaTex 2019 version and unmaintained tabu package +\usepackage{ifthen} +\ifx\requestedLaTeXdate\undefined +\usepackage{array} +\else +\usepackage{array}[=2016-10-06] +\fi +%% +% Packages required by doxygen +\usepackage{fixltx2e} +\usepackage{calc} +\usepackage{doxygen} +\usepackage{graphicx} +\usepackage[utf8]{inputenc} +\usepackage{makeidx} +\usepackage{multicol} +\usepackage{multirow} +\PassOptionsToPackage{warn}{textcomp} +\usepackage{textcomp} +\usepackage[nointegrals]{wasysym} +\usepackage[table]{xcolor} +\usepackage{ifpdf,ifxetex} + +% Font selection +\usepackage[T1]{fontenc} +\usepackage[scaled=.90]{helvet} +\usepackage{courier} +\usepackage{amssymb} +\usepackage{sectsty} +\renewcommand{\familydefault}{\sfdefault} +\allsectionsfont{% + \fontseries{bc}\selectfont% + \color{darkgray}% +} +\renewcommand{\DoxyLabelFont}{% + \fontseries{bc}\selectfont% + \color{darkgray}% +} +\newcommand{\+}{\discretionary{\mbox{\scriptsize$\hookleftarrow$}}{}{}} + +% Arguments of doxygenemoji: +% 1) '::' form of the emoji, already "LaTeX"-escaped +% 2) file with the name of the emoji without the .png extension +% in case image exist use this otherwise use the '::' form +\newcommand{\doxygenemoji}[2]{% + \IfFileExists{./#2.png}{\raisebox{-0.1em}{\includegraphics[height=0.9em]{./#2.png}}}{#1}% +} +% Page & text layout +\usepackage{geometry} +\geometry{% + a4paper,% + top=2.5cm,% + bottom=2.5cm,% + left=2.5cm,% + right=2.5cm% +} +\tolerance=750 +\hfuzz=15pt +\hbadness=750 +\setlength{\emergencystretch}{15pt} +\setlength{\parindent}{0cm} +\newcommand{\doxynormalparskip}{\setlength{\parskip}{3ex plus 2ex minus 2ex}} +\newcommand{\doxytocparskip}{\setlength{\parskip}{1ex plus 0ex minus 0ex}} +\doxynormalparskip +\makeatletter +\renewcommand{\paragraph}{% + \@startsection{paragraph}{4}{0ex}{-1.0ex}{1.0ex}{% + \normalfont\normalsize\bfseries\SS@parafont% + }% +} +\renewcommand{\subparagraph}{% + \@startsection{subparagraph}{5}{0ex}{-1.0ex}{1.0ex}{% + \normalfont\normalsize\bfseries\SS@subparafont% + }% +} +\makeatother + +\makeatletter +\newcommand\hrulefilll{\leavevmode\leaders\hrule\hskip 0pt plus 1filll\kern\z@} +\makeatother + +% Headers & footers +\usepackage{fancyhdr} +\pagestyle{fancyplain} +\fancyhead[LE]{\fancyplain{}{\bfseries\thepage}} +\fancyhead[CE]{\fancyplain{}{}} +\fancyhead[RE]{\fancyplain{}{\bfseries\leftmark}} +\fancyhead[LO]{\fancyplain{}{\bfseries\rightmark}} +\fancyhead[CO]{\fancyplain{}{}} +\fancyhead[RO]{\fancyplain{}{\bfseries\thepage}} +\fancyfoot[LE]{\fancyplain{}{}} +\fancyfoot[CE]{\fancyplain{}{}} +\fancyfoot[RE]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }} +\fancyfoot[LO]{\fancyplain{}{\bfseries\scriptsize Generated by Doxygen }} +\fancyfoot[CO]{\fancyplain{}{}} +\fancyfoot[RO]{\fancyplain{}{}} +\renewcommand{\footrulewidth}{0.4pt} +\renewcommand{\chaptermark}[1]{% + \markboth{#1}{}% +} +\renewcommand{\sectionmark}[1]{% + \markright{\thesection\ #1}% +} + +% Indices & bibliography +\usepackage{natbib} +\usepackage[titles]{tocloft} +\setcounter{tocdepth}{3} +\setcounter{secnumdepth}{5} +\makeindex + +\usepackage{newunicodechar} + \newunicodechar{⁻}{${}^{-}$}% Superscript minus + \newunicodechar{²}{${}^{2}$}% Superscript two + \newunicodechar{³}{${}^{3}$}% Superscript three + +% Hyperlinks (required, but should be loaded last) +\ifpdf + \usepackage[pdftex,pagebackref=true]{hyperref} +\else + \ifxetex + \usepackage[pagebackref=true]{hyperref} + \else + \usepackage[ps2pdf,pagebackref=true]{hyperref} + \fi +\fi + +\hypersetup{% + colorlinks=true,% + linkcolor=blue,% + citecolor=blue,% + unicode% +} + +% Custom commands +\newcommand{\clearemptydoublepage}{% + \newpage{\pagestyle{empty}\cleardoublepage}% +} + +\usepackage{caption} +\captionsetup{labelsep=space,justification=centering,font={bf},singlelinecheck=off,skip=4pt,position=top} + +\usepackage{etoc} +\etocsettocstyle{\doxytocparskip}{\doxynormalparskip} +\renewcommand{\numberline}[1]{#1~} +%===== C O N T E N T S ===== + +\begin{document} + +% Titlepage & ToC +\hypersetup{pageanchor=false, + bookmarksnumbered=true, + pdfencoding=unicode + } +\pagenumbering{alph} +\begin{titlepage} +\vspace*{7cm} +\begin{center}% +{\Large Intel H\+E\+XL }\\ +\vspace*{1cm} +{\large Generated by Doxygen 1.8.17}\\ +\end{center} +\end{titlepage} +\clearemptydoublepage +\pagenumbering{roman} +\tableofcontents +\clearemptydoublepage +\pagenumbering{arabic} +\hypersetup{pageanchor=true} + +%--- Begin generated contents --- +\chapter{Intel Homomorphic Encryption Acceleration Library (H\+E\+XL)} +\label{index}\hypertarget{index}{}\input{index} +\chapter{Namespace Index} +\input{namespaces} +\chapter{Class Index} +\input{annotated} +\chapter{File Index} +\input{files} +\chapter{Namespace Documentation} +\input{namespaceintel} +\input{namespaceintel_1_1hexl} +\chapter{Class Documentation} +\input{classintel_1_1hexl_1_1_n_t_t} +\chapter{File Documentation} +\input{eltwise-add-mod_8hpp} +\input{eltwise-cmp-add_8hpp} +\input{eltwise-cmp-sub-mod_8hpp} +\input{eltwise-fma-mod_8hpp} +\input{eltwise-mult-mod_8hpp} +\input{eltwise-reduce-mod_8hpp} +\input{intel-hexl_8hpp} +\input{ntt_8hpp} +\input{util_8hpp} +\input{_r_e_a_d_m_e_8md} +%--- End generated contents --- + +% Index +\backmatter +\newpage +\phantomsection +\clearemptydoublepage +\addcontentsline{toc}{chapter}{\indexname} +\printindex + +\end{document} diff --git a/docs/docs/doxygen/latex/tabu_doxygen.sty b/docs/docs/doxygen/latex/tabu_doxygen.sty new file mode 100644 index 00000000..60fd7e8d --- /dev/null +++ b/docs/docs/doxygen/latex/tabu_doxygen.sty @@ -0,0 +1,2557 @@ +%% +%% This is file `tabu.sty', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% tabu.dtx (with options: `package') +%% +%% This is a generated file. +%% Copyright (FC) 2010-2011 - lppl +%% +%% tabu : 2011/02/26 v2.8 - tabu : Flexible LaTeX tabulars +%% +%% ********************************************************************************************** +%% \begin{tabu} { preamble } => default target: \linewidth or \linegoal +%% \begin{tabu} to { preamble } => target specified +%% \begin{tabu} spread { preamble } => target relative to the ``natural width'' +%% +%% tabu works in text and in math modes. +%% +%% X columns: automatic width adjustment + horizontal and vertical alignment +%% \begin{tabu} { X[4c] X[1c] X[-2ml] } +%% +%% Horizontal lines and / or leaders: +%% \hline\hline => double horizontal line +%% \firsthline\hline => for nested tabulars +%% \lasthline\hline => for nested tabulars +%% \tabucline[line spec]{column-column} => ``funny'' lines (dash/leader) +%% Automatic lines / leaders : +%% \everyrow{\hline\hline} +%% +%% Vertical lines and / or leaders: +%% \begin{tabu} { |[3pt red] X[4c] X[1c] X[-2ml] |[3pt blue] } +%% \begin{tabu} { |[3pt red] X[4c] X[1c] X[-2ml] |[3pt on 2pt off 4pt blue] } +%% +%% Fixed vertical spacing adjustment: +%% \extrarowheight= \extrarowdepth= +%% or: \extrarowsep= => may be prefixed by \global +%% +%% Dynamic vertical spacing adjustment: +%% \abovetabulinesep= \belowtabulinesep= +%% or: \tabulinesep= => may be prefixed by \global +%% +%% delarray.sty shortcuts: in math and text modes +%% \begin{tabu} .... \({ preamble }\) +%% +%% Algorithms reports: +%% \tracingtabu=1 \tracingtabu=2 +%% +%% ********************************************************************************************** +%% +%% This work may be distributed and/or modified under the +%% conditions of the LaTeX Project Public License, either +%% version 1.3 of this license or (at your option) any later +%% version. The latest version of this license is in +%% http://www.latex-project.org/lppl.txt +%% +%% This work consists of the main source file tabu.dtx +%% and the derived files +%% tabu.sty, tabu.pdf, tabu.ins +%% +%% tabu : Flexible LaTeX tabulars +%% lppl copyright 2010-2011 by FC +%% + +\NeedsTeXFormat{LaTeX2e}[2005/12/01] +\ProvidesPackage{tabu_doxygen}[2011/02/26 v2.8 - flexible LaTeX tabulars (FC), frozen version for doxygen] +\RequirePackage{array}[2008/09/09] +\RequirePackage{varwidth}[2009/03/30] +\AtEndOfPackage{\tabu@AtEnd \let\tabu@AtEnd \@undefined} +\let\tabu@AtEnd\@empty +\def\TMP@EnsureCode#1={% + \edef\tabu@AtEnd{\tabu@AtEnd + \catcode#1 \the\catcode#1}% + \catcode#1=% +}% \TMP@EnsureCode +\TMP@EnsureCode 33 = 12 % ! +\TMP@EnsureCode 58 = 12 % : (for siunitx) +\TMP@EnsureCode124 = 12 % | +\TMP@EnsureCode 36 = 3 % $ = math shift +\TMP@EnsureCode 38 = 4 % & = tab alignment character +\TMP@EnsureCode 32 = 10 % space +\TMP@EnsureCode 94 = 7 % ^ +\TMP@EnsureCode 95 = 8 % _ +%% Constants -------------------------------------------------------- +\newcount \c@taburow \def\thetaburow {\number\c@taburow} +\newcount \tabu@nbcols +\newcount \tabu@cnt +\newcount \tabu@Xcol +\let\tabu@start \@tempcnta +\let\tabu@stop \@tempcntb +\newcount \tabu@alloc \tabu@alloc=\m@ne +\newcount \tabu@nested +\def\tabu@alloc@{\global\advance\tabu@alloc \@ne \tabu@nested\tabu@alloc} +\newdimen \tabu@target +\newdimen \tabu@spreadtarget +\newdimen \tabu@naturalX +\newdimen \tabucolX +\let\tabu@DELTA \@tempdimc +\let\tabu@thick \@tempdima +\let\tabu@on \@tempdimb +\let\tabu@off \@tempdimc +\newdimen \tabu@Xsum +\newdimen \extrarowdepth +\newdimen \abovetabulinesep +\newdimen \belowtabulinesep +\newdimen \tabustrutrule \tabustrutrule \z@ +\newtoks \tabu@thebody +\newtoks \tabu@footnotes +\newsavebox \tabu@box +\newsavebox \tabu@arstrutbox +\newsavebox \tabu@hleads +\newsavebox \tabu@vleads +\newif \iftabu@colortbl +\newif \iftabu@siunitx +\newif \iftabu@measuring +\newif \iftabu@spread +\newif \iftabu@negcoef +\newif \iftabu@everyrow +\def\tabu@everyrowtrue {\global\let\iftabu@everyrow \iftrue} +\def\tabu@everyrowfalse{\global\let\iftabu@everyrow \iffalse} +\newif \iftabu@long +\newif \iftabuscantokens +\def\tabu@rescan {\tabu@verbatim \scantokens } +%% Utilities (for internal usage) ----------------------------------- +\def\tabu@gobblespace #1 {#1} +\def\tabu@gobbletoken #1#2{#1} +\def\tabu@gobbleX{\futurelet\@let@token \tabu@gobblex} +\def\tabu@gobblex{\if ^^J\noexpand\@let@token \expandafter\@gobble + \else\ifx \@sptoken\@let@token + \expandafter\tabu@gobblespace\expandafter\tabu@gobbleX + \fi\fi +}% \tabu@gobblex +\def\tabu@X{^^J} +{\obeyspaces +\global\let\tabu@spxiii= % saves an active space (for \ifx) +\gdef\tabu@@spxiii{ }} +\def\tabu@ifenvir {% only for \multicolumn + \expandafter\tabu@if@nvir\csname\@currenvir\endcsname +}% \tabu@ifenvir +\def\tabu@if@nvir #1{\csname @\ifx\tabu#1first\else + \ifx\longtabu#1first\else + second\fi\fi oftwo\endcsname +}% \tabu@ifenvir +\def\tabu@modulo #1#2{\numexpr\ifnum\numexpr#1=\z@ 0\else #1-(#1-(#2-1)/2)/(#2)*(#2)\fi} +{\catcode`\&=3 +\gdef\tabu@strtrim #1{% #1 = control sequence to trim + \ifodd 1\ifx #1\@empty \else \ifx #1\space \else 0\fi \fi + \let\tabu@c@l@r \@empty \let#1\@empty + \else \expandafter \tabu@trimspaces #1\@nnil + \fi +}% \tabu@strtrim +\gdef\tabu@trimspaces #1\@nnil{\let\tabu@c@l@r=#2\tabu@firstspace .#1& }% +\gdef\tabu@firstspace #1#2#3 &{\tabu@lastspace #2#3&} +\gdef\tabu@lastspace #1{\def #3{#1}% + \ifx #3\tabu@c@l@r \def\tabu@c@l@r{\protect\color{#1}}\expandafter\remove@to@nnil \fi + \tabu@trimspaces #1\@nnil} +}% \catcode +\def\tabu@sanitizearg #1#2{{% + \csname \ifcsname if@safe@actives\endcsname % + @safe@activestrue\else + relax\fi \endcsname + \edef#2{#1}\tabu@strtrim#2\@onelevel@sanitize#2% + \expandafter}\expandafter\def\expandafter#2\expandafter{#2}% +}% \tabu@sanitizearg +\def\tabu@textbar #1{\begingroup \endlinechar\m@ne \scantokens{\def\:{|}}% + \expandafter\endgroup \expandafter#1\:% !!! semi simple group !!! +}% \tabu@textbar +\def\tabu@everyrow@bgroup{\iftabu@everyrow \begingroup \else \noalign{\ifnum0=`}\fi \fi} +\def\tabu@everyrow@egroup{% + \iftabu@everyrow \expandafter \endgroup \the\toks@ + \else \ifnum0=`{\fi}% + \fi +}% \tabu@everyrow@egroup +\def\tabu@arstrut {\global\setbox\@arstrutbox \hbox{\vrule + height \arraystretch \dimexpr\ht\strutbox+\extrarowheight + depth \arraystretch \dimexpr\dp\strutbox+\extrarowdepth + width \z@}% +}% \tabu@arstrut +\def\tabu@rearstrut {% + \@tempdima \arraystretch\dimexpr\ht\strutbox+\extrarowheight \relax + \@tempdimb \arraystretch\dimexpr\dp\strutbox+\extrarowdepth \relax + \ifodd 1\ifdim \ht\@arstrutbox=\@tempdima + \ifdim \dp\@arstrutbox=\@tempdimb 0 \fi\fi + \tabu@mkarstrut + \fi +}% \tabu@rearstrut +\def\tabu@@DBG #1{\ifdim\tabustrutrule>\z@ \color{#1}\fi} +\def\tabu@DBG@arstrut {\global\setbox\@arstrutbox + \hbox to\z@{\hbox to\z@{\hss + {\tabu@DBG{cyan}\vrule + height \arraystretch \dimexpr\ht\strutbox+\extrarowheight + depth \z@ + width \tabustrutrule}\kern-\tabustrutrule + {\tabu@DBG{pink}\vrule + height \z@ + depth \arraystretch \dimexpr\dp\strutbox+\extrarowdepth + width \tabustrutrule}}}% +}% \tabu@DBG@arstrut +\def\tabu@save@decl{\toks\count@ \expandafter{\the\toks\expandafter\count@ + \@nextchar}}% +\def\tabu@savedecl{\ifcat$\d@llarend\else + \let\save@decl \tabu@save@decl \fi % no inversion of tokens in text mode +}% \tabu@savedecl +\def\tabu@finalstrut #1{\unskip\ifhmode\nobreak\fi\vrule height\z@ depth\z@ width\z@} +\newcommand*\tabuDisableCommands {\g@addto@macro\tabu@trialh@@k } +\let\tabu@trialh@@k \@empty +\def\tabu@nowrite #1#{{\afterassignment}\toks@} +\let\tabu@write\write +\let\tabu@immediate\immediate +\def\tabu@WRITE{\begingroup + \def\immediate\write{\aftergroup\endgroup + \tabu@immediate\tabu@write}% +}% \tabu@WRITE +\expandafter\def\expandafter\tabu@GenericError\expandafter{% + \expandafter\tabu@WRITE\GenericError} +\def\tabu@warn{\tabu@WRITE\PackageWarning{tabu}} +\def\tabu@noxfootnote [#1]{\@gobble} +\def\tabu@nocolor #1#{\@gobble} +\newcommand*\tabu@norowcolor[2][]{} +\def\tabu@maybesiunitx #1{\def\tabu@temp{#1}% + \futurelet\@let@token \tabu@m@ybesiunitx} +\def\tabu@m@ybesiunitx #1{\def\tabu@m@ybesiunitx {% + \ifx #1\@let@token \let\tabu@cellleft \@empty \let\tabu@cellright \@empty \fi + \tabu@temp}% \tabu@m@ybesiunitx +}\expandafter\tabu@m@ybesiunitx \csname siunitx_table_collect_begin:Nn\endcsname +\def\tabu@celllalign@def #1{\def\tabu@celllalign{\tabu@maybesiunitx{#1}}}% +%% Fixed vertical spacing adjustment: \extrarowsep ------------------ +\newcommand*\extrarowsep{\edef\tabu@C@extra{\the\numexpr\tabu@C@extra+1}% + \iftabu@everyrow \aftergroup\tabu@Gextra + \else \aftergroup\tabu@n@Gextra + \fi + \@ifnextchar={\tabu@gobbletoken\tabu@extra} \tabu@extra +}% \extrarowsep +\def\tabu@extra {\@ifnextchar_% + {\tabu@gobbletoken{\tabu@setextra\extrarowheight \extrarowdepth}} + {\ifx ^\@let@token \def\tabu@temp{% + \tabu@gobbletoken{\tabu@setextra\extrarowdepth \extrarowheight}}% + \else \let\tabu@temp \@empty + \afterassignment \tabu@setextrasep \extrarowdepth + \fi \tabu@temp}% +}% \tabu@extra +\def\tabu@setextra #1#2{\def\tabu@temp{\tabu@extr@#1#2}\afterassignment\tabu@temp#2} +\def\tabu@extr@ #1#2{\@ifnextchar^% + {\tabu@gobbletoken{\tabu@setextra\extrarowdepth \extrarowheight}} + {\ifx _\@let@token \def\tabu@temp{% + \tabu@gobbletoken{\tabu@setextra\extrarowheight \extrarowdepth}}% + \else \let\tabu@temp \@empty + \tabu@Gsave \tabu@G@extra \tabu@C@extra \extrarowheight \extrarowdepth + \fi \tabu@temp}% +}% \tabu@extr@ +\def\tabu@setextrasep {\extrarowheight=\extrarowdepth + \tabu@Gsave \tabu@G@extra \tabu@C@extra \extrarowheight \extrarowdepth +}% \tabu@setextrasep +\def\tabu@Gextra{\ifx \tabu@G@extra\@empty \else {\tabu@Rextra}\fi} +\def\tabu@n@Gextra{\ifx \tabu@G@extra\@empty \else \noalign{\tabu@Rextra}\fi} +\def\tabu@Rextra{\tabu@Grestore \tabu@G@extra \tabu@C@extra} +\let\tabu@C@extra \z@ +\let\tabu@G@extra \@empty +%% Dynamic vertical spacing adjustment: \tabulinesep ---------------- +\newcommand*\tabulinesep{\edef\tabu@C@linesep{\the\numexpr\tabu@C@linesep+1}% + \iftabu@everyrow \aftergroup\tabu@Glinesep + \else \aftergroup\tabu@n@Glinesep + \fi + \@ifnextchar={\tabu@gobbletoken\tabu@linesep} \tabu@linesep +}% \tabulinesep +\def\tabu@linesep {\@ifnextchar_% + {\tabu@gobbletoken{\tabu@setsep\abovetabulinesep \belowtabulinesep}} + {\ifx ^\@let@token \def\tabu@temp{% + \tabu@gobbletoken{\tabu@setsep\belowtabulinesep \abovetabulinesep}}% + \else \let\tabu@temp \@empty + \afterassignment \tabu@setlinesep \abovetabulinesep + \fi \tabu@temp}% +}% \tabu@linesep +\def\tabu@setsep #1#2{\def\tabu@temp{\tabu@sets@p#1#2}\afterassignment\tabu@temp#2} +\def\tabu@sets@p #1#2{\@ifnextchar^% + {\tabu@gobbletoken{\tabu@setsep\belowtabulinesep \abovetabulinesep}} + {\ifx _\@let@token \def\tabu@temp{% + \tabu@gobbletoken{\tabu@setsep\abovetabulinesep \belowtabulinesep}}% + \else \let\tabu@temp \@empty + \tabu@Gsave \tabu@G@linesep \tabu@C@linesep \abovetabulinesep \belowtabulinesep + \fi \tabu@temp}% +}% \tabu@sets@p +\def\tabu@setlinesep {\belowtabulinesep=\abovetabulinesep + \tabu@Gsave \tabu@G@linesep \tabu@C@linesep \abovetabulinesep \belowtabulinesep +}% \tabu@setlinesep +\def\tabu@Glinesep{\ifx \tabu@G@linesep\@empty \else {\tabu@Rlinesep}\fi} +\def\tabu@n@Glinesep{\ifx \tabu@G@linesep\@empty \else \noalign{\tabu@Rlinesep}\fi} +\def\tabu@Rlinesep{\tabu@Grestore \tabu@G@linesep \tabu@C@linesep} +\let\tabu@C@linesep \z@ +\let\tabu@G@linesep \@empty +%% \global\extrarowsep and \global\tabulinesep ------------------- +\def\tabu@Gsave #1#2#3#4{\xdef#1{#1% + \toks#2{\toks\the\currentgrouplevel{\global#3\the#3\global#4\the#4}}}% +}% \tabu@Gsave +\def\tabu@Grestore#1#2{% + \toks#2{}#1\toks\currentgrouplevel\expandafter{\expandafter}\the\toks#2\relax + \ifcat$\the\toks\currentgrouplevel$\else + \global\let#1\@empty \global\let#2\z@ + \the\toks\currentgrouplevel + \fi +}% \tabu@Grestore +%% Setting code for every row --------------------------------------- +\newcommand*\everyrow{\tabu@everyrow@bgroup + \tabu@start \z@ \tabu@stop \z@ \tabu@evrstartstop +}% \everyrow +\def\tabu@evrstartstop {\@ifnextchar^% + {\afterassignment \tabu@evrstartstop \tabu@stop=}% + {\ifx ^\@let@token + \afterassignment\tabu@evrstartstop \tabu@start=% + \else \afterassignment\tabu@everyr@w \toks@ + \fi}% +}% \tabu@evrstartstop +\def\tabu@everyr@w {% + \xdef\tabu@everyrow{% + \noexpand\tabu@everyrowfalse + \let\noalign \relax + \noexpand\tabu@rowfontreset + \iftabu@colortbl \noexpand\tabu@rc@ \fi % \taburowcolors + \let\noexpand\tabu@docline \noexpand\tabu@docline@evr + \the\toks@ + \noexpand\tabu@evrh@@k + \noexpand\tabu@rearstrut + \global\advance\c@taburow \@ne}% + \iftabu@everyrow \toks@\expandafter + {\expandafter\def\expandafter\tabu@evr@L\expandafter{\the\toks@}\ignorespaces}% + \else \xdef\tabu@evr@G{\the\toks@}% + \fi + \tabu@everyrow@egroup +}% \tabu@everyr@w +\def\tabu@evr {\def\tabu@evrh@@k} % for internal use only +\tabu@evr{} +%% line style and leaders ------------------------------------------- +\newcommand*\newtabulinestyle [1]{% + {\@for \@tempa :=#1\do{\expandafter\tabu@newlinestyle \@tempa==\@nil}}% +}% \newtabulinestyle +\def\tabu@newlinestyle #1=#2=#3\@nil{\tabu@getline {#2}% + \tabu@sanitizearg {#1}\@tempa + \ifodd 1\ifx \@tempa\@empty \ifdefined\tabu@linestyle@ 0 \fi\fi + \global\expandafter\let + \csname tabu@linestyle@\@tempa \endcsname =\tabu@thestyle \fi +}% \tabu@newlinestyle +\newcommand*\tabulinestyle [1]{\tabu@everyrow@bgroup \tabu@getline{#1}% + \iftabu@everyrow + \toks@\expandafter{\expandafter \def \expandafter + \tabu@ls@L\expandafter{\tabu@thestyle}\ignorespaces}% + \gdef\tabu@ls@{\tabu@ls@L}% + \else + \global\let\tabu@ls@G \tabu@thestyle + \gdef\tabu@ls@{\tabu@ls@G}% + \fi + \tabu@everyrow@egroup +}% \tabulinestyle +\newcommand*\taburulecolor{\tabu@everyrow@bgroup \tabu@textbar \tabu@rulecolor} +\def\tabu@rulecolor #1{\toks@{}% + \def\tabu@temp #1##1#1{\tabu@ruledrsc{##1}}\@ifnextchar #1% + \tabu@temp + \tabu@rulearc +}% \tabu@rulecolor +\def\tabu@ruledrsc #1{\edef\tabu@temp{#1}\tabu@strtrim\tabu@temp + \ifx \tabu@temp\@empty \def\tabu@temp{\tabu@rule@drsc@ {}{}}% + \else \edef\tabu@temp{\noexpand\tabu@rule@drsc@ {}{\tabu@temp}}% + \fi + \tabu@temp +}% \tabu@ruledrsc@ +\def\tabu@ruledrsc@ #1#{\tabu@rule@drsc@ {#1}} +\def\tabu@rule@drsc@ #1#2{% + \iftabu@everyrow + \ifx \\#1#2\\\toks@{\let\CT@drsc@ \relax}% + \else \toks@{\def\CT@drsc@{\color #1{#2}}}% + \fi + \else + \ifx \\#1#2\\\global\let\CT@drsc@ \relax + \else \gdef\CT@drsc@{\color #1{#2}}% + \fi + \fi + \tabu@rulearc +}% \tabu@rule@drsc@ +\def\tabu@rulearc #1#{\tabu@rule@arc@ {#1}} +\def\tabu@rule@arc@ #1#2{% + \iftabu@everyrow + \ifx \\#1#2\\\toks@\expandafter{\the\toks@ \def\CT@arc@{}}% + \else \toks@\expandafter{\the\toks@ \def\CT@arc@{\color #1{#2}}}% + \fi + \toks@\expandafter{\the\toks@ + \let\tabu@arc@L \CT@arc@ + \let\tabu@drsc@L \CT@drsc@ + \ignorespaces}% + \else + \ifx \\#1#2\\\gdef\CT@arc@{}% + \else \gdef\CT@arc@{\color #1{#2}}% + \fi + \global\let\tabu@arc@G \CT@arc@ + \global\let\tabu@drsc@G \CT@drsc@ + \fi + \tabu@everyrow@egroup +}% \tabu@rule@arc@ +\def\taburowcolors {\tabu@everyrow@bgroup \@testopt \tabu@rowcolors 1} +\def\tabu@rowcolors [#1]#2#{\tabu@rowc@lors{#1}{#2}} +\def\tabu@rowc@lors #1#2#3{% + \toks@{}\@defaultunits \count@ =\number0#2\relax \@nnil + \@defaultunits \tabu@start =\number0#1\relax \@nnil + \ifnum \count@<\tw@ \count@=\tw@ \fi + \advance\tabu@start \m@ne + \ifnum \tabu@start<\z@ \tabu@start \z@ \fi + \tabu@rowcolorseries #3\in@..\in@ \@nnil +}% \tabu@rowcolors +\def\tabu@rowcolorseries #1..#2\in@ #3\@nnil {% + \ifx \in@#1\relax + \iftabu@everyrow \toks@{\def\tabu@rc@{}\let\tabu@rc@L \tabu@rc@}% + \else \gdef\tabu@rc@{}\global\let\tabu@rc@G \tabu@rc@ + \fi + \else + \ifx \\#2\\\tabu@rowcolorserieserror \fi + \tabu@sanitizearg{#1}\tabu@temp + \tabu@sanitizearg{#2}\@tempa + \advance\count@ \m@ne + \iftabu@everyrow + \def\tabu@rc@ ##1##2##3##4{\def\tabu@rc@{% + \ifnum ##2=\c@taburow + \definecolorseries{tabu@rcseries@\the\tabu@nested}{rgb}{last}{##3}{##4}\fi + \ifnum \c@taburow<##2 \else + \ifnum \tabu@modulo {\c@taburow-##2}{##1+1}=\z@ + \resetcolorseries[{##1}]{tabu@rcseries@\the\tabu@nested}\fi + \xglobal\colorlet{tabu@rc@\the\tabu@nested}{tabu@rcseries@\the\tabu@nested!!+}% + \rowcolor{tabu@rc@\the\tabu@nested}\fi}% + }\edef\x{\noexpand\tabu@rc@ {\the\count@} + {\the\tabu@start} + {\tabu@temp} + {\@tempa}% + }\x + \toks@\expandafter{\expandafter\def\expandafter\tabu@rc@\expandafter{\tabu@rc@}}% + \toks@\expandafter{\the\toks@ \let\tabu@rc@L \tabu@rc@ \ignorespaces}% + \else % inside \noalign + \definecolorseries{tabu@rcseries@\the\tabu@nested}{rgb}{last}{\tabu@temp}{\@tempa}% + \expandafter\resetcolorseries\expandafter[\the\count@]{tabu@rcseries@\the\tabu@nested}% + \xglobal\colorlet{tabu@rc@\the\tabu@nested}{tabu@rcseries@\the\tabu@nested!!+}% + \let\noalign \relax \rowcolor{tabu@rc@\the\tabu@nested}% + \def\tabu@rc@ ##1##2{\gdef\tabu@rc@{% + \ifnum \tabu@modulo {\c@taburow-##2}{##1+1}=\@ne + \resetcolorseries[{##1}]{tabu@rcseries@\the\tabu@nested}\fi + \xglobal\colorlet{tabu@rc@\the\tabu@nested}{tabu@rcseries@\the\tabu@nested!!+}% + \rowcolor{tabu@rc@\the\tabu@nested}}% + }\edef\x{\noexpand\tabu@rc@{\the\count@}{\the\c@taburow}}\x + \global\let\tabu@rc@G \tabu@rc@ + \fi + \fi + \tabu@everyrow@egroup +}% \tabu@rowcolorseries +\tabuDisableCommands {\let\tabu@rc@ \@empty } +\def\tabu@rowcolorserieserror {\PackageError{tabu} + {Invalid syntax for \string\taburowcolors + \MessageBreak Please look at the documentation!}\@ehd +}% \tabu@rowcolorserieserror +\newcommand*\tabureset {% + \tabulinesep=\z@ \extrarowsep=\z@ \extratabsurround=\z@ + \tabulinestyle{}\everyrow{}\taburulecolor||{}\taburowcolors{}% +}% \tabureset +%% Parsing the line styles ------------------------------------------ +\def\tabu@getline #1{\begingroup + \csname \ifcsname if@safe@actives\endcsname % + @safe@activestrue\else + relax\fi \endcsname + \edef\tabu@temp{#1}\tabu@sanitizearg{#1}\@tempa + \let\tabu@thestyle \relax + \ifcsname tabu@linestyle@\@tempa \endcsname + \edef\tabu@thestyle{\endgroup + \def\tabu@thestyle{\expandafter\noexpand + \csname tabu@linestyle@\@tempa\endcsname}% + }\tabu@thestyle + \else \expandafter\tabu@definestyle \tabu@temp \@nil + \fi +}% \tabu@getline +\def\tabu@definestyle #1#2\@nil {\endlinechar \m@ne \makeatletter + \tabu@thick \maxdimen \tabu@on \maxdimen \tabu@off \maxdimen + \let\tabu@c@lon \@undefined \let\tabu@c@loff \@undefined + \ifodd 1\ifcat .#1\else\ifcat\relax #1\else 0\fi\fi % catcode 12 or non expandable cs + \def\tabu@temp{\tabu@getparam{thick}}% + \else \def\tabu@temp{\tabu@getparam{thick}\maxdimen}% + \fi + {% + \let\tabu@ \relax + \def\:{\obeyspaces \tabu@oXIII \tabu@commaXIII \edef\:}% (space active \: happy ;-)) + \scantokens{\:{\tabu@temp #1#2 \tabu@\tabu@}}% + \expandafter}\expandafter + \def\expandafter\:\expandafter{\:}% line spec rewritten now ;-) + \def\;{\def\:}% + \scantokens\expandafter{\expandafter\;\expandafter{\:}}% space is now inactive (catcode 10) + \let\tabu@ \tabu@getcolor \:% all arguments are ready now ;-) + \ifdefined\tabu@c@lon \else \let\tabu@c@lon\@empty \fi + \ifx \tabu@c@lon\@empty \def\tabu@c@lon{\CT@arc@}\fi + \ifdefined\tabu@c@loff \else \let\tabu@c@loff \@empty \fi + \ifdim \tabu@on=\maxdimen \ifdim \tabu@off<\maxdimen + \tabu@on \tabulineon \fi\fi + \ifdim \tabu@off=\maxdimen \ifdim \tabu@on<\maxdimen + \tabu@off \tabulineoff \fi\fi + \ifodd 1\ifdim \tabu@off=\maxdimen \ifdim \tabu@on=\maxdimen 0 \fi\fi + \in@true % + \else \in@false % + \fi + \ifdim\tabu@thick=\maxdimen \def\tabu@thick{\arrayrulewidth}% + \else \edef\tabu@thick{\the\tabu@thick}% + \fi + \edef \tabu@thestyle ##1##2{\endgroup + \def\tabu@thestyle{% + \ifin@ \noexpand\tabu@leadersstyle {\tabu@thick} + {\the\tabu@on}{##1} + {\the\tabu@off}{##2}% + \else \noexpand\tabu@rulesstyle + {##1\vrule width \tabu@thick}% + {##1\leaders \hrule height \tabu@thick \hfil}% + \fi}% + }\expandafter \expandafter + \expandafter \tabu@thestyle \expandafter + \expandafter \expandafter + {\expandafter\tabu@c@lon\expandafter}\expandafter{\tabu@c@loff}% +}% \tabu@definestyle +{\catcode`\O=\active \lccode`\O=`\o \catcode`\,=\active + \lowercase{\gdef\tabu@oXIII {\catcode`\o=\active \let O=\tabu@oxiii}} + \gdef\tabu@commaXIII {\catcode`\,=\active \let ,=\space} +}% \catcode +\def\tabu@oxiii #1{% + \ifcase \ifx n#1\z@ \else + \ifx f#1\@ne\else + \tw@ \fi\fi + \expandafter\tabu@onxiii + \or \expandafter\tabu@ofxiii + \else o% + \fi#1}% +\def\tabu@onxiii #1#2{% + \ifcase \ifx !#2\tw@ \else + \ifcat.\noexpand#2\z@ \else + \ifx \tabu@spxiii#2\@ne\else + \tw@ \fi\fi\fi + \tabu@getparam{on}#2\expandafter\@gobble + \or \expandafter\tabu@onxiii % (space is active) + \else o\expandafter\@firstofone + \fi{#1#2}}% +\def\tabu@ofxiii #1#2{% + \ifx #2f\expandafter\tabu@offxiii + \else o\expandafter\@firstofone + \fi{#1#2}} +\def\tabu@offxiii #1#2{% + \ifcase \ifx !#2\tw@ \else + \ifcat.\noexpand#2\z@ \else + \ifx\tabu@spxiii#2\@ne \else + \tw@ \fi\fi\fi + \tabu@getparam{off}#2\expandafter\@gobble + \or \expandafter\tabu@offxiii % (space is active) + \else o\expandafter\@firstofone + \fi{#1#2}} +\def\tabu@getparam #1{\tabu@ \csname tabu@#1\endcsname=} +\def\tabu@getcolor #1{% \tabu@ <- \tabu@getcolor after \edef + \ifx \tabu@#1\else % no more spec + \let\tabu@theparam=#1\afterassignment \tabu@getc@l@r #1\fi +}% \tabu@getcolor +\def\tabu@getc@l@r #1\tabu@ {% + \def\tabu@temp{#1}\tabu@strtrim \tabu@temp + \ifx \tabu@temp\@empty + \else%\ifcsname \string\color@\tabu@temp \endcsname % if the color exists + \ifx \tabu@theparam \tabu@off \let\tabu@c@loff \tabu@c@l@r + \else \let\tabu@c@lon \tabu@c@l@r + \fi + %\else \tabu@warncolour{\tabu@temp}% + \fi%\fi + \tabu@ % next spec +}% \tabu@getc@l@r +\def\tabu@warncolour #1{\PackageWarning{tabu} + {Color #1 is not defined. Default color used}% +}% \tabu@warncolour +\def\tabu@leadersstyle #1#2#3#4#5{\def\tabu@leaders{{#1}{#2}{#3}{#4}{#5}}% + \ifx \tabu@leaders\tabu@leaders@G \else + \tabu@LEADERS{#1}{#2}{#3}{#4}{#5}\fi +}% \tabu@leadersstyle +\def\tabu@rulesstyle #1#2{\let\tabu@leaders \@undefined + \gdef\tabu@thevrule{#1}\gdef\tabu@thehrule{#2}% +}% \tabu@rulesstyle +%% The leaders boxes ------------------------------------------------ +\def\tabu@LEADERS #1#2#3#4#5{%% width, dash, dash color, gap, gap color + {\let\color \tabu@color % => during trials -> \color = \tabu@nocolor + {% % but the leaders boxes should have colors ! + \def\@therule{\vrule}\def\@thick{height}\def\@length{width}% + \def\@box{\hbox}\def\@unbox{\unhbox}\def\@elt{\wd}% + \def\@skip{\hskip}\def\@ss{\hss}\def\tabu@leads{\tabu@hleads}% + \tabu@l@@d@rs {#1}{#2}{#3}{#4}{#5}% + \global\let\tabu@thehleaders \tabu@theleaders + }% + {% + \def\@therule{\hrule}\def\@thick{width}\def\@length{height}% + \def\@box{\vbox}\def\@unbox{\unvbox}\def\@elt{\ht}% + \def\@skip{\vskip}\def\@ss{\vss}\def\tabu@leads{\tabu@vleads}% + \tabu@l@@d@rs {#1}{#2}{#3}{#4}{#5}% + \global\let\tabu@thevleaders \tabu@theleaders + }% + \gdef\tabu@leaders@G{{#1}{#2}{#3}{#4}{#5}}% + }% +}% \tabu@LEADERS +\def\tabu@therule #1#2{\@therule \@thick#1\@length\dimexpr#2/2 \@depth\z@} +\def\tabu@l@@d@rs #1#2#3#4#5{%% width, dash, dash color, gap, gap color + \global\setbox \tabu@leads=\@box{% + {#3\tabu@therule{#1}{#2}}% + \ifx\\#5\\\@skip#4\else{#5\tabu@therule{#1}{#4*2}}\fi + {#3\tabu@therule{#1}{#2}}}% + \global\setbox\tabu@leads=\@box to\@elt\tabu@leads{\@ss + {#3\tabu@therule{#1}{#2}}\@unbox\tabu@leads}% + \edef\tabu@theleaders ##1{\def\noexpand\tabu@theleaders {% + {##1\tabu@therule{#1}{#2}}% + \xleaders \copy\tabu@leads \@ss + \tabu@therule{0pt}{-#2}{##1\tabu@therule{#1}{#2}}}% + }\tabu@theleaders{#3}% +}% \tabu@l@@d@rs +%% \tabu \endtabu \tabu* \longtabu \endlongtabu \longtabu* ---------- +\newcommand*\tabu {\tabu@longfalse + \ifmmode \def\tabu@ {\array}\def\endtabu {\endarray}% + \else \def\tabu@ {\tabu@tabular}\def\endtabu {\endtabular}\fi + \expandafter\let\csname tabu*\endcsname \tabu + \expandafter\def\csname endtabu*\endcsname{\endtabu}% + \tabu@spreadfalse \tabu@negcoeffalse \tabu@settarget +}% {tabu} +\let\tabu@tabular \tabular % +\expandafter\def\csname tabu*\endcsname{\tabuscantokenstrue \tabu} +\newcommand*\longtabu {\tabu@longtrue + \ifmmode\PackageError{tabu}{longtabu not allowed in math mode}\fi + \def\tabu@{\longtable}\def\endlongtabu{\endlongtable}% + \LTchunksize=\@M + \expandafter\let\csname tabu*\endcsname \tabu + \expandafter\def\csname endlongtabu*\endcsname{\endlongtabu}% + \let\LT@startpbox \tabu@LT@startpbox % \everypar{ array struts } + \tabu@spreadfalse \tabu@negcoeffalse \tabu@settarget +}% {longtabu} +\expandafter\def\csname longtabu*\endcsname{\tabuscantokenstrue \longtabu} +\def\tabu@nolongtabu{\PackageError{tabu} + {longtabu requires the longtable package}\@ehd} +%% Read the target and then : \tabular or \@array ------------------ +\def\tabu@settarget {\futurelet\@let@token \tabu@sett@rget } +\def\tabu@sett@rget {\tabu@target \z@ + \ifcase \ifx \bgroup\@let@token \z@ \else + \ifx \@sptoken\@let@token \@ne \else + \if t\@let@token \tw@ \else + \if s\@let@token \thr@@\else + \z@\fi\fi\fi\fi + \expandafter\tabu@begin + \or \expandafter\tabu@gobblespace\expandafter\tabu@settarget + \or \expandafter\tabu@to + \or \expandafter\tabu@spread + \fi +}% \tabu@sett@rget +\def\tabu@to to{\def\tabu@halignto{to}\tabu@gettarget} +\def\tabu@spread spread{\tabu@spreadtrue\def\tabu@halignto{spread}\tabu@gettarget} +\def\tabu@gettarget {\afterassignment\tabu@linegoaltarget \tabu@target } +\def\tabu@linegoaltarget {\futurelet\tabu@temp \tabu@linegoalt@rget } +\def\tabu@linegoalt@rget {% + \ifx \tabu@temp\LNGL@setlinegoal + \LNGL@setlinegoal \expandafter \@firstoftwo \fi % @gobbles \LNGL@setlinegoal + \tabu@begin +}% \tabu@linegoalt@rget +\def\tabu@begin #1#{% + \iftabu@measuring \expandafter\tabu@nestedmeasure \fi + \ifdim \tabu@target=\z@ \let\tabu@halignto \@empty + \else \edef\tabu@halignto{\tabu@halignto\the\tabu@target}% + \fi + \@testopt \tabu@tabu@ \tabu@aligndefault #1\@nil +}% \tabu@begin +\long\def\tabu@tabu@ [#1]#2\@nil #3{\tabu@setup + \def\tabu@align {#1}\def\tabu@savedpream{\NC@find #3}% + \tabu@ [\tabu@align ]#2{#3\tabu@rewritefirst }% +}% \tabu@tabu@ +\def\tabu@nestedmeasure {% + \ifodd 1\iftabu@spread \else \ifdim\tabu@target=\z@ \else 0 \fi\fi\relax + \tabu@spreadtrue + \else \begingroup \iffalse{\fi \ifnum0=`}\fi + \toks@{}\def\tabu@stack{b}% + \expandafter\tabu@collectbody\expandafter\tabu@quickrule + \expandafter\endgroup + \fi +}% \tabu@nestedmeasure +\def\tabu@quickrule {\indent\vrule height\z@ depth\z@ width\tabu@target} +%% \tabu@setup \tabu@init \tabu@indent +\def\tabu@setup{\tabu@alloc@ + \ifcase \tabu@nested + \ifmmode \else \iftabu@spread\else \ifdim\tabu@target=\z@ + \let\tabu@afterendpar \par + \fi\fi\fi + \def\tabu@aligndefault{c}\tabu@init \tabu@indent + \else % + \def\tabu@aligndefault{t}\let\tabudefaulttarget \linewidth + \fi + \let\tabu@thetarget \tabudefaulttarget \let\tabu@restored \@undefined + \edef\tabu@NC@list{\the\NC@list}\NC@list{\NC@do \tabu@rewritefirst}% + \everycr{}\let\@startpbox \tabu@startpbox % for nested tabu inside longtabu... + \let\@endpbox \tabu@endpbox % idem " " " " " " + \let\@tabarray \tabu@tabarray % idem " " " " " " + \tabu@setcleanup \tabu@setreset +}% \tabu@setup +\def\tabu@init{\tabu@starttimer \tabu@measuringfalse + \edef\tabu@hfuzz {\the\dimexpr\hfuzz+1sp}\global\tabu@footnotes{}% + \let\firsthline \tabu@firsthline \let\lasthline \tabu@lasthline + \let\firstline \tabu@firstline \let\lastline \tabu@lastline + \let\hline \tabu@hline \let\@xhline \tabu@xhline + \let\color \tabu@color \let\@arstrutbox \tabu@arstrutbox + \iftabu@colortbl\else\let\LT@@hline \tabu@LT@@hline \fi + \tabu@trivlist % + \let\@footnotetext \tabu@footnotetext \let\@xfootnotetext \tabu@xfootnotetext + \let\@xfootnote \tabu@xfootnote \let\centering \tabu@centering + \let\raggedright \tabu@raggedright \let\raggedleft \tabu@raggedleft + \let\tabudecimal \tabu@tabudecimal \let\Centering \tabu@Centering + \let\RaggedRight \tabu@RaggedRight \let\RaggedLeft \tabu@RaggedLeft + \let\justifying \tabu@justifying \let\rowfont \tabu@rowfont + \let\fbox \tabu@fbox \let\color@b@x \tabu@color@b@x + \let\tabu@@everycr \everycr \let\tabu@@everypar \everypar + \let\tabu@prepnext@tokORI \prepnext@tok\let\prepnext@tok \tabu@prepnext@tok + \let\tabu@multicolumnORI\multicolumn \let\multicolumn \tabu@multicolumn + \let\tabu@startpbox \@startpbox % for nested tabu inside longtabu pfff !!! + \let\tabu@endpbox \@endpbox % idem " " " " " " " + \let\tabu@tabarray \@tabarray % idem " " " " " " " + \tabu@adl@fix \let\endarray \tabu@endarray % colortbl & arydshln (delarray) + \iftabu@colortbl\CT@everycr\expandafter{\expandafter\iftabu@everyrow \the\CT@everycr \fi}\fi +}% \tabu@init +\def\tabu@indent{% correction for indentation + \ifdim \parindent>\z@\ifx \linewidth\tabudefaulttarget + \everypar\expandafter{% + \the\everypar\everypar\expandafter{\the\everypar}% + \setbox\z@=\lastbox + \ifdim\wd\z@>\z@ \edef\tabu@thetarget + {\the\dimexpr -\wd\z@+\tabudefaulttarget}\fi + \box\z@}% + \fi\fi +}% \tabu@indent +\def\tabu@setcleanup {% saves last global assignments + \ifodd 1\ifmmode \else \iftabu@long \else 0\fi\fi\relax + \def\tabu@aftergroupcleanup{% + \def\tabu@aftergroupcleanup{\aftergroup\tabu@cleanup}}% + \else + \def\tabu@aftergroupcleanup{% + \aftergroup\aftergroup\aftergroup\tabu@cleanup + \let\tabu@aftergroupcleanup \relax}% + \fi + \let\tabu@arc@Gsave \tabu@arc@G + \let\tabu@arc@G \tabu@arc@L % + \let\tabu@drsc@Gsave \tabu@drsc@G + \let\tabu@drsc@G \tabu@drsc@L % + \let\tabu@ls@Gsave \tabu@ls@G + \let\tabu@ls@G \tabu@ls@L % + \let\tabu@rc@Gsave \tabu@rc@G + \let\tabu@rc@G \tabu@rc@L % + \let\tabu@evr@Gsave \tabu@evr@G + \let\tabu@evr@G \tabu@evr@L % + \let\tabu@celllalign@save \tabu@celllalign + \let\tabu@cellralign@save \tabu@cellralign + \let\tabu@cellleft@save \tabu@cellleft + \let\tabu@cellright@save \tabu@cellright + \let\tabu@@celllalign@save \tabu@@celllalign + \let\tabu@@cellralign@save \tabu@@cellralign + \let\tabu@@cellleft@save \tabu@@cellleft + \let\tabu@@cellright@save \tabu@@cellright + \let\tabu@rowfontreset@save \tabu@rowfontreset + \let\tabu@@rowfontreset@save\tabu@@rowfontreset + \let\tabu@rowfontreset \@empty + \edef\tabu@alloc@save {\the\tabu@alloc}% restore at \tabu@reset + \edef\c@taburow@save {\the\c@taburow}% + \edef\tabu@naturalX@save {\the\tabu@naturalX}% + \let\tabu@naturalXmin@save \tabu@naturalXmin + \let\tabu@naturalXmax@save \tabu@naturalXmax + \let\tabu@mkarstrut@save \tabu@mkarstrut + \edef\tabu@clarstrut{% + \extrarowheight \the\dimexpr \ht\@arstrutbox-\ht\strutbox \relax + \extrarowdepth \the\dimexpr \dp\@arstrutbox-\dp\strutbox \relax + \let\noexpand\@arraystretch \@ne \noexpand\tabu@rearstrut}% +}% \tabu@setcleanup +\def\tabu@cleanup {\begingroup + \globaldefs\@ne \tabu@everyrowtrue + \let\tabu@arc@G \tabu@arc@Gsave + \let\CT@arc@ \tabu@arc@G + \let\tabu@drsc@G \tabu@drsc@Gsave + \let\CT@drsc@ \tabu@drsc@G + \let\tabu@ls@G \tabu@ls@Gsave + \let\tabu@ls@ \tabu@ls@G + \let\tabu@rc@G \tabu@rc@Gsave + \let\tabu@rc@ \tabu@rc@G + \let\CT@do@color \relax + \let\tabu@evr@G \tabu@evr@Gsave + \let\tabu@celllalign \tabu@celllalign@save + \let\tabu@cellralign \tabu@cellralign@save + \let\tabu@cellleft \tabu@cellleft@save + \let\tabu@cellright \tabu@cellright@save + \let\tabu@@celllalign \tabu@@celllalign@save + \let\tabu@@cellralign \tabu@@cellralign@save + \let\tabu@@cellleft \tabu@@cellleft@save + \let\tabu@@cellright \tabu@@cellright@save + \let\tabu@rowfontreset \tabu@rowfontreset@save + \let\tabu@@rowfontreset \tabu@@rowfontreset@save + \tabu@naturalX =\tabu@naturalX@save + \let\tabu@naturalXmax \tabu@naturalXmax@save + \let\tabu@naturalXmin \tabu@naturalXmin@save + \let\tabu@mkarstrut \tabu@mkarstrut@save + \c@taburow =\c@taburow@save + \ifcase \tabu@nested \tabu@alloc \m@ne\fi + \endgroup % + \ifcase \tabu@nested + \the\tabu@footnotes \global\tabu@footnotes{}% + \tabu@afterendpar \tabu@elapsedtime + \fi + \tabu@clarstrut + \everyrow\expandafter {\tabu@evr@G}% +}% \tabu@cleanup +\let\tabu@afterendpar \relax +\def\tabu@setreset {% + \edef\tabu@savedparams {% \relax for \tabu@message@save + \ifmmode \col@sep \the\arraycolsep + \else \col@sep \the\tabcolsep \fi \relax + \arrayrulewidth \the\arrayrulewidth \relax + \doublerulesep \the\doublerulesep \relax + \extratabsurround \the\extratabsurround \relax + \extrarowheight \the\extrarowheight \relax + \extrarowdepth \the\extrarowdepth \relax + \abovetabulinesep \the\abovetabulinesep \relax + \belowtabulinesep \the\belowtabulinesep \relax + \def\noexpand\arraystretch{\arraystretch}% + \ifdefined\minrowclearance \minrowclearance\the\minrowclearance\relax\fi}% + \begingroup + \@temptokena\expandafter{\tabu@savedparams}% => only for \savetabu / \usetabu + \ifx \tabu@arc@L\relax \else \tabu@setsave \tabu@arc@L \fi + \ifx \tabu@drsc@L\relax \else \tabu@setsave \tabu@drsc@L \fi + \tabu@setsave \tabu@ls@L \tabu@setsave \tabu@evr@L + \expandafter \endgroup \expandafter + \def\expandafter\tabu@saved@ \expandafter{\the\@temptokena + \let\tabu@arc@G \tabu@arc@L + \let\tabu@drsc@G \tabu@drsc@L + \let\tabu@ls@G \tabu@ls@L + \let\tabu@rc@G \tabu@rc@L + \let\tabu@evr@G \tabu@evr@L}% + \def\tabu@reset{\tabu@savedparams + \tabu@everyrowtrue \c@taburow \z@ + \let\CT@arc@ \tabu@arc@L + \let\CT@drsc@ \tabu@drsc@L + \let\tabu@ls@ \tabu@ls@L + \let\tabu@rc@ \tabu@rc@L + \global\tabu@alloc \tabu@alloc@save + \everyrow\expandafter{\tabu@evr@L}}% +}% \tabu@reset +\def\tabu@setsave #1{\expandafter\tabu@sets@ve #1\@nil{#1}} +\long\def\tabu@sets@ve #1\@nil #2{\@temptokena\expandafter{\the\@temptokena \def#2{#1}}} +%% The Rewriting Process ------------------------------------------- +\def\tabu@newcolumntype #1{% + \expandafter\tabu@new@columntype + \csname NC@find@\string#1\expandafter\endcsname + \csname NC@rewrite@\string#1\endcsname + {#1}% +}% \tabu@newcolumntype +\def\tabu@new@columntype #1#2#3{% + \def#1##1#3{\NC@{##1}}% + \let#2\relax \newcommand*#2% +}% \tabu@new@columntype +\def\tabu@privatecolumntype #1{% + \expandafter\tabu@private@columntype + \csname NC@find@\string#1\expandafter\endcsname + \csname NC@rewrite@\string#1\expandafter\endcsname + \csname tabu@NC@find@\string#1\expandafter\endcsname + \csname tabu@NC@rewrite@\string#1\endcsname + {#1}% +}% \tabu@privatecolumntype +\def\tabu@private@columntype#1#2#3#4{% + \g@addto@macro\tabu@privatecolumns{\let#1#3\let#2#4}% + \tabu@new@columntype#3#4% +}% \tabu@private@columntype +\let\tabu@privatecolumns \@empty +\newcommand*\tabucolumn [1]{\expandafter \def \expandafter + \tabu@highprioritycolumns\expandafter{\tabu@highprioritycolumns + \NC@do #1}}% +\let\tabu@highprioritycolumns \@empty +%% The | ``column'' : rewriting process -------------------------- +\tabu@privatecolumntype |{\tabu@rewritevline} +\newcommand*\tabu@rewritevline[1][]{\tabu@vlinearg{#1}% + \expandafter \NC@find \tabu@rewritten} +\def\tabu@lines #1{% + \ifx|#1\else \tabu@privatecolumntype #1{\tabu@rewritevline}\fi + \NC@list\expandafter{\the\NC@list \NC@do #1}% +}% \tabu@lines@ +\def\tabu@vlinearg #1{% + \ifx\\#1\\\def\tabu@thestyle {\tabu@ls@}% + \else\tabu@getline {#1}% + \fi + \def\tabu@rewritten ##1{\def\tabu@rewritten{!{##1\tabu@thevline}}% + }\expandafter\tabu@rewritten\expandafter{\tabu@thestyle}% + \expandafter \tabu@keepls \tabu@thestyle \@nil +}% \tabu@vlinearg +\def\tabu@keepls #1\@nil{% + \ifcat $\@cdr #1\@nil $% + \ifx \relax#1\else + \ifx \tabu@ls@#1\else + \let#1\relax + \xdef\tabu@mkpreambuffer{\tabu@mkpreambuffer + \tabu@savels\noexpand#1}\fi\fi\fi +}% \tabu@keepls +\def\tabu@thevline {\begingroup + \ifdefined\tabu@leaders + \setbox\@tempboxa=\vtop to\dimexpr + \ht\@arstrutbox+\dp\@arstrutbox{{\tabu@thevleaders}}% + \ht\@tempboxa=\ht\@arstrutbox \dp\@tempboxa=\dp\@arstrutbox + \box\@tempboxa + \else + \tabu@thevrule + \fi \endgroup +}% \tabu@thevline +\def\tabu@savels #1{% + \expandafter\let\csname\string#1\endcsname #1% + \expandafter\def\expandafter\tabu@reset\expandafter{\tabu@reset + \tabu@resetls#1}}% +\def\tabu@resetls #1{\expandafter\let\expandafter#1\csname\string#1\endcsname}% +%% \multicolumn inside tabu environment ----------------------------- +\tabu@newcolumntype \tabu@rewritemulticolumn{% + \aftergroup \tabu@endrewritemulticolumn % after \@mkpream group + \NC@list{\NC@do *}\tabu@textbar \tabu@lines + \tabu@savedecl + \tabu@privatecolumns + \NC@list\expandafter{\the\expandafter\NC@list \tabu@NC@list}% + \let\tabu@savels \relax + \NC@find +}% \tabu@rewritemulticolumn +\def\tabu@endrewritemulticolumn{\gdef\tabu@mkpreambuffer{}\endgroup} +\def\tabu@multicolumn{\tabu@ifenvir \tabu@multic@lumn \tabu@multicolumnORI} +\long\def\tabu@multic@lumn #1#2#3{\multispan{#1}\begingroup + \tabu@everyrowtrue + \NC@list{\NC@do \tabu@rewritemulticolumn}% + \expandafter\@gobbletwo % gobbles \multispan{#1} + \tabu@multicolumnORI{#1}{\tabu@rewritemulticolumn #2}% + {\iftabuscantokens \tabu@rescan \else \expandafter\@firstofone \fi + {#3}}% +}% \tabu@multic@lumn +%% The X column(s): rewriting process ----------------------------- +\tabu@privatecolumntype X[1][]{\begingroup \tabu@siunitx{\endgroup \tabu@rewriteX {#1}}} +\def\tabu@nosiunitx #1{#1{}{}\expandafter \NC@find \tabu@rewritten } +\def\tabu@siunitx #1{\@ifnextchar \bgroup + {\tabu@rewriteX@Ss{#1}} + {\tabu@nosiunitx{#1}}} +\def\tabu@rewriteX@Ss #1#2{\@temptokena{}% + \@defaultunits \let\tabu@temp =#2\relax\@nnil + \ifodd 1\ifx S\tabu@temp \else \ifx s\tabu@temp \else 0 \fi\fi + \def\NC@find{\def\NC@find >####1####2<####3\relax{#1 {####1}{####3}% + }\expandafter\NC@find \the\@temptokena \relax + }\expandafter\NC@rewrite@S \@gobble #2\relax + \else \tabu@siunitxerror + \fi + \expandafter \NC@find \tabu@rewritten +}% \tabu@rewriteX@Ss +\def\tabu@siunitxerror {\PackageError{tabu}{Not a S nor s column ! + \MessageBreak X column can only embed siunitx S or s columns}\@ehd +}% \tabu@siunitxerror +\def\tabu@rewriteX #1#2#3{\tabu@Xarg {#1}{#2}{#3}% + \iftabu@measuring + \else \tabu@measuringtrue % first X column found in the preamble + \let\@halignto \relax \let\tabu@halignto \relax + \iftabu@spread \tabu@spreadtarget \tabu@target \tabu@target \z@ + \else \tabu@spreadtarget \z@ \fi + \ifdim \tabu@target=\z@ + \setlength\tabu@target \tabu@thetarget + \tabu@message{\tabu@message@defaulttarget}% + \else \tabu@message{\tabu@message@target}\fi + \fi +}% \tabu@rewriteX +\def\tabu@rewriteXrestore #1#2#3{\let\@halignto \relax + \def\tabu@rewritten{l}} +\def\tabu@Xarg #1#2#3{% + \advance\tabu@Xcol \@ne \let\tabu@Xlcr \@empty + \let\tabu@Xdisp \@empty \let\tabu@Xmath \@empty + \ifx\\#1\\% + \def\tabu@rewritten{p}\tabucolX \p@ % + \else + \let\tabu@rewritten \@empty \let\tabu@temp \@empty \tabucolX \z@ + \tabu@Xparse {}#1\relax + \fi + \tabu@Xrewritten{#2}{#3}% +}% \tabu@Xarg +\def\tabu@Xparse #1{\futurelet\@let@token \tabu@Xtest} +\expandafter\def\expandafter\tabu@Xparsespace\space{\tabu@Xparse{}} +\def\tabu@Xtest{% + \ifcase \ifx \relax\@let@token \z@ \else + \if ,\@let@token \m@ne\else + \if p\@let@token 1\else + \if m\@let@token 2\else + \if b\@let@token 3\else + \if l\@let@token 4\else + \if c\@let@token 5\else + \if r\@let@token 6\else + \if j\@let@token 7\else + \if L\@let@token 8\else + \if C\@let@token 9\else + \if R\@let@token 10\else + \if J\@let@token 11\else + \ifx \@sptoken\@let@token 12\else + \if .\@let@token 13\else + \if -\@let@token 13\else + \ifcat $\@let@token 14\else + 15\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\relax + \or \tabu@Xtype {p}% + \or \tabu@Xtype {m}% + \or \tabu@Xtype {b}% + \or \tabu@Xalign \raggedright\relax + \or \tabu@Xalign \centering\relax + \or \tabu@Xalign \raggedleft\relax + \or \tabu@Xalign \tabu@justify\relax + \or \tabu@Xalign \RaggedRight\raggedright + \or \tabu@Xalign \Centering\centering + \or \tabu@Xalign \RaggedLeft\raggedleft + \or \tabu@Xalign \justifying\tabu@justify + \or \expandafter \tabu@Xparsespace + \or \expandafter \tabu@Xcoef + \or \expandafter \tabu@Xm@th + \or \tabu@Xcoef{}% + \else\expandafter \tabu@Xparse + \fi +}% \tabu@Xtest +\def\tabu@Xalign #1#2{% + \ifx \tabu@Xlcr\@empty \else \PackageWarning{tabu} + {Duplicate horizontal alignment specification}\fi + \ifdefined#1\def\tabu@Xlcr{#1}\let#1\relax + \else \def\tabu@Xlcr{#2}\let#2\relax\fi + \expandafter\tabu@Xparse +}% \tabu@Xalign +\def\tabu@Xtype #1{% + \ifx \tabu@rewritten\@empty \else \PackageWarning{tabu} + {Duplicate vertical alignment specification}\fi + \def\tabu@rewritten{#1}\expandafter\tabu@Xparse +}% \tabu@Xtype +\def\tabu@Xcoef#1{\edef\tabu@temp{\tabu@temp#1}% + \afterassignment\tabu@Xc@ef \tabu@cnt\number\if-#10\fi +}% \tabu@Xcoef +\def\tabu@Xc@ef{\advance\tabucolX \tabu@temp\the\tabu@cnt\p@ + \tabu@Xparse{}% +}% \tabu@Xc@ef +\def\tabu@Xm@th #1{\futurelet \@let@token \tabu@Xd@sp} +\def\tabu@Xd@sp{\let\tabu@Xmath=$% + \ifx $\@let@token \def\tabu@Xdisp{\displaystyle}% + \expandafter\tabu@Xparse + \else \expandafter\tabu@Xparse\expandafter{\expandafter}% + \fi +}% \tabu@Xd@sp +\def\tabu@Xrewritten {% + \ifx \tabu@rewritten\@empty \def\tabu@rewritten{p}\fi + \ifdim \tabucolX<\z@ \tabu@negcoeftrue + \else\ifdim \tabucolX=\z@ \tabucolX \p@ + \fi\fi + \edef\tabu@temp{{\the\tabu@Xcol}{\tabu@strippt\tabucolX}}% + \edef\tabu@Xcoefs{\tabu@Xcoefs \tabu@ \tabu@temp}% + \edef\tabu@rewritten ##1##2{\def\noexpand\tabu@rewritten{% + >{\tabu@Xlcr \ifx$\tabu@Xmath$\tabu@Xdisp\fi ##1}% + \tabu@rewritten {\tabu@hsize \tabu@temp}% + <{##2\ifx$\tabu@Xmath$\fi}}% + }\tabu@rewritten +}% \tabu@Xrewritten +\def\tabu@hsize #1#2{% + \ifdim #2\p@<\z@ + \ifdim \tabucolX=\maxdimen \tabu@wd{#1}\else + \ifdim \tabu@wd{#1}<-#2\tabucolX \tabu@wd{#1}\else -#2\tabucolX\fi + \fi + \else #2\tabucolX + \fi +}% \tabu@hsize +%% \usetabu and \preamble: rewriting process --------------------- +\tabu@privatecolumntype \usetabu [1]{% + \ifx\\#1\\\tabu@saveerr{}\else + \@ifundefined{tabu@saved@\string#1} + {\tabu@saveerr{#1}} + {\let\tabu@rewriteX \tabu@rewriteXrestore + \csname tabu@saved@\string#1\expandafter\endcsname\expandafter\@ne}% + \fi +}% \NC@rewrite@\usetabu +\tabu@privatecolumntype \preamble [1]{% + \ifx\\#1\\\tabu@saveerr{}\else + \@ifundefined{tabu@saved@\string#1} + {\tabu@saveerr{#1}} + {\csname tabu@saved@\string#1\expandafter\endcsname\expandafter\z@}% + \fi +}% \NC@rewrite@\preamble +%% Controlling the rewriting process ------------------------------- +\tabu@newcolumntype \tabu@rewritefirst{% + \iftabu@long \aftergroup \tabu@longpream % + \else \aftergroup \tabu@pream + \fi + \let\tabu@ \relax \let\tabu@hsize \relax + \let\tabu@Xcoefs \@empty \let\tabu@savels \relax + \tabu@Xcol \z@ \tabu@cnt \tw@ + \gdef\tabu@mkpreambuffer{\tabu@{}}\tabu@measuringfalse + \global\setbox\@arstrutbox \box\@arstrutbox + \NC@list{\NC@do *}\tabu@textbar \tabu@lines + \NC@list\expandafter{\the\NC@list \NC@do X}% + \iftabu@siunitx % + \NC@list\expandafter{\the\NC@list \NC@do S\NC@do s}\fi + \NC@list\expandafter{\the\expandafter\NC@list \tabu@highprioritycolumns}% + \expandafter\def\expandafter\tabu@NC@list\expandafter{% + \the\expandafter\NC@list \tabu@NC@list}% % * | X S + \NC@list\expandafter{\expandafter \NC@do \expandafter\usetabu + \expandafter \NC@do \expandafter\preamble + \the\NC@list \NC@do \tabu@rewritemiddle + \NC@do \tabu@rewritelast}% + \tabu@savedecl + \tabu@privatecolumns + \edef\tabu@prev{\the\@temptokena}\NC@find \tabu@rewritemiddle +}% NC@rewrite@\tabu@rewritefirst +\tabu@newcolumntype \tabu@rewritemiddle{% + \edef\tabu@temp{\the\@temptokena}\NC@find \tabu@rewritelast +}% \NC@rewrite@\tabu@rewritemiddle +\tabu@newcolumntype \tabu@rewritelast{% + \ifx \tabu@temp\tabu@prev \advance\tabu@cnt \m@ne + \NC@list\expandafter{\tabu@NC@list \NC@do \tabu@rewritemiddle + \NC@do \tabu@rewritelast}% + \else \let\tabu@prev\tabu@temp + \fi + \ifcase \tabu@cnt \expandafter\tabu@endrewrite + \else \expandafter\NC@find \expandafter\tabu@rewritemiddle + \fi +}% \NC@rewrite@\tabu@rewritelast +%% Choosing the strategy -------------------------------------------- +\def\tabu@endrewrite {% + \let\tabu@temp \NC@find + \ifx \@arrayright\relax \let\@arrayright \@empty \fi + \count@=% + \ifx \@finalstrut\tabu@finalstrut \z@ % outer in mode 0 print + \iftabu@measuring + \xdef\tabu@mkpreambuffer{\tabu@mkpreambuffer + \tabu@target \csname tabu@\the\tabu@nested.T\endcsname + \tabucolX \csname tabu@\the\tabu@nested.X\endcsname + \edef\@halignto {\ifx\@arrayright\@empty to\tabu@target\fi}}% + \fi + \else\iftabu@measuring 4 % X columns + \xdef\tabu@mkpreambuffer{\tabu@{\tabu@mkpreambuffer + \tabu@target \the\tabu@target + \tabu@spreadtarget \the\tabu@spreadtarget}% + \def\noexpand\tabu@Xcoefs{\tabu@Xcoefs}% + \edef\tabu@halignto{\ifx \@arrayright\@empty to\tabu@target\fi}}% + \let\tabu@Xcoefs \relax + \else\ifcase\tabu@nested \thr@@ % outer, no X + \global\let\tabu@afterendpar \relax + \else \@ne % inner, no X, outer in mode 1 or 2 + \fi + \ifdefined\tabu@usetabu + \else \ifdim\tabu@target=\z@ + \else \let\tabu@temp \tabu@extracolsep + \fi\fi + \fi + \fi + \xdef\tabu@mkpreambuffer{\count@ \the\count@ \tabu@mkpreambuffer}% + \tabu@temp +}% \tabu@endrewrite +\def\tabu@extracolsep{\@defaultunits \expandafter\let + \expandafter\tabu@temp \expandafter=\the\@temptokena \relax\@nnil + \ifx \tabu@temp\@sptoken + \expandafter\tabu@gobblespace \expandafter\tabu@extracolsep + \else + \edef\tabu@temp{\noexpand\NC@find + \if |\noexpand\tabu@temp @% + \else\if !\noexpand\tabu@temp @% + \else !% + \fi\fi + {\noexpand\extracolsep\noexpand\@flushglue}}% + \fi + \tabu@temp +}% \tabu@extrac@lsep +%% Implementing the strategy ---------------------------------------- +\long\def\tabu@pream #1\@preamble {% + \let\tabu@ \tabu@@ \tabu@mkpreambuffer \tabu@aftergroupcleanup + \NC@list\expandafter {\tabu@NC@list}% in case of nesting... + \ifdefined\tabu@usetabu \tabu@usetabu \tabu@target \z@ \fi + \let\tabu@savedpreamble \@preamble + \global\let\tabu@elapsedtime \relax + \tabu@thebody ={#1\tabu@aftergroupcleanup}% + \tabu@thebody =\expandafter{\the\expandafter\tabu@thebody + \@preamble}% + \edef\tabuthepreamble {\the\tabu@thebody}% ( no @ allowed for \scantokens ) + \tabu@select +}% \tabu@pream +\long\def\tabu@longpream #1\LT@bchunk #2\LT@bchunk{% + \let\tabu@ \tabu@@ \tabu@mkpreambuffer \tabu@aftergroupcleanup + \NC@list\expandafter {\tabu@NC@list}% in case of nesting... + \let\tabu@savedpreamble \@preamble + \global\let\tabu@elapsedtime \relax + \tabu@thebody ={#1\LT@bchunk #2\tabu@aftergroupcleanup \LT@bchunk}% + \edef\tabuthepreamble {\the\tabu@thebody}% ( no @ allowed for \scantokens ) + \tabu@select +}% \tabu@longpream +\def\tabu@select {% + \ifnum\tabu@nested>\z@ \tabuscantokensfalse \fi + \ifnum \count@=\@ne \iftabu@measuring \count@=\tw@ \fi\fi + \ifcase \count@ + \global\let\tabu@elapsedtime \relax + \tabu@seteverycr + \expandafter \tabuthepreamble % vertical adjustment (inherited from outer) + \or % exit in vertical measure + struts per cell because no X and outer in mode 3 + \tabu@evr{\tabu@verticalinit}\tabu@celllalign@def{\tabu@verticalmeasure}% + \def\tabu@cellralign{\tabu@verticalspacing}% + \tabu@seteverycr + \expandafter \tabuthepreamble + \or % exit without measure because no X and outer in mode 4 + \tabu@evr{}\tabu@celllalign@def{}\let\tabu@cellralign \@empty + \tabu@seteverycr + \expandafter \tabuthepreamble + \else % needs trials + \tabu@evr{}\tabu@celllalign@def{}\let\tabu@cellralign \@empty + \tabu@savecounters + \expandafter \tabu@setstrategy + \fi +}% \tabu@select +\def\tabu@@ {\gdef\tabu@mkpreambuffer} +%% Protections to set up before trials ------------------------------ +\def\tabu@setstrategy {\begingroup % + \tabu@trialh@@k \tabu@cnt \z@ % number of trials + \hbadness \@M \let\hbadness \@tempcnta + \hfuzz \maxdimen \let\hfuzz \@tempdima + \let\write \tabu@nowrite\let\GenericError \tabu@GenericError + \let\savetabu \@gobble \let\tabudefaulttarget \linewidth + \let\@footnotetext \@gobble \let\@xfootnote \tabu@xfootnote + \let\color \tabu@nocolor\let\rowcolor \tabu@norowcolor + \let\tabu@aftergroupcleanup \relax % only after the last trial + \tabu@mkpreambuffer + \ifnum \count@>\thr@@ \let\@halignto \@empty \tabucolX@init + \def\tabu@lasttry{\m@ne\p@}\fi + \begingroup \iffalse{\fi \ifnum0=`}\fi + \toks@{}\def\tabu@stack{b}\iftabuscantokens \endlinechar=10 \obeyspaces \fi % + \tabu@collectbody \tabu@strategy % +}% \tabu@setstrategy +\def\tabu@savecounters{% + \def\@elt ##1{\csname c@##1\endcsname\the\csname c@##1\endcsname}% + \edef\tabu@clckpt {\begingroup \globaldefs=\@ne \cl@@ckpt \endgroup}\let\@elt \relax +}% \tabu@savecounters +\def\tabucolX@init {% \tabucolX <= \tabu@target / (sum coefs > 0) + \dimen@ \z@ \tabu@Xsum \z@ \tabucolX \z@ \let\tabu@ \tabu@Xinit \tabu@Xcoefs + \ifdim \dimen@>\z@ + \@tempdima \dimexpr \tabu@target *\p@/\dimen@ + \tabu@hfuzz\relax + \ifdim \tabucolX<\@tempdima \tabucolX \@tempdima \fi + \fi +}% \tabucolX@init +\def\tabu@Xinit #1#2{\tabu@Xcol #1 \advance \tabu@Xsum + \ifdim #2\p@>\z@ #2\p@ \advance\dimen@ #2\p@ + \else -#2\p@ \tabu@negcoeftrue + \@tempdima \dimexpr \tabu@target*\p@/\dimexpr-#2\p@\relax \relax + \ifdim \tabucolX<\@tempdima \tabucolX \@tempdima \fi + \tabu@wddef{#1}{0pt}% + \fi +}% \tabu@Xinit +%% Collecting the environment body ---------------------------------- +\long\def\tabu@collectbody #1#2\end #3{% + \edef\tabu@stack{\tabu@pushbegins #2\begin\end\expandafter\@gobble\tabu@stack}% + \ifx \tabu@stack\@empty + \toks@\expandafter{\expandafter\tabu@thebody\expandafter{\the\toks@ #2}% + \def\tabu@end@envir{\end{#3}}% + \iftabuscantokens + \iftabu@long \def\tabu@endenvir {\end{#3}\tabu@gobbleX}% + \else \def\tabu@endenvir {\let\endarray \@empty + \end{#3}\tabu@gobbleX}% + \fi + \else \def\tabu@endenvir {\end{#3}}\fi}% + \let\tabu@collectbody \tabu@endofcollect + \else\def\tabu@temp{#3}% + \ifx \tabu@temp\@empty \toks@\expandafter{\the\toks@ #2\end }% + \else \ifx\tabu@temp\tabu@@spxiii \toks@\expandafter{\the\toks@ #2\end #3}% + \else \ifx\tabu@temp\tabu@X \toks@\expandafter{\the\toks@ #2\end #3}% + \else \toks@\expandafter{\the\toks@ #2\end{#3}}% + \fi\fi\fi + \fi + \tabu@collectbody{#1}% +}% \tabu@collectbody +\long\def\tabu@pushbegins#1\begin#2{\ifx\end#2\else b\expandafter\tabu@pushbegins\fi}% +\def\tabu@endofcollect #1{\ifnum0=`{}\fi + \expandafter\endgroup \the\toks@ #1% +}% \tabu@endofcollect +%% The trials: switching between strategies ------------------------- +\def\tabu@strategy {\relax % stops \count@ assignment ! + \ifcase\count@ % case 0 = print with vertical adjustment (outer is finished) + \expandafter \tabu@endoftrials + \or % case 1 = exit in vertical measure (outer in mode 3) + \expandafter\xdef\csname tabu@\the\tabu@nested.T\endcsname{\the\tabu@target}% + \expandafter\xdef\csname tabu@\the\tabu@nested.X\endcsname{\the\tabucolX}% + \expandafter \tabu@endoftrials + \or % case 2 = exit with a rule replacing the table (outer in mode 4) + \expandafter \tabu@quickend + \or % case 3 = outer is in mode 3 because of no X + \begingroup + \tabu@evr{\tabu@verticalinit}\tabu@celllalign@def{\tabu@verticalmeasure}% + \def\tabu@cellralign{\tabu@verticalspacing}% + \expandafter \tabu@measuring + \else % case 4 = horizontal measure + \begingroup + \global\let\tabu@elapsedtime \tabu@message@etime + \long\def\multicolumn##1##2##3{\multispan{##1}}% + \let\tabu@startpboxORI \@startpbox + \iftabu@spread + \def\tabu@naturalXmax {\z@}% + \let\tabu@naturalXmin \tabu@naturalXmax + \tabu@evr{\global\tabu@naturalX \z@}% + \let\@startpbox \tabu@startpboxmeasure + \else\iftabu@negcoef + \let\@startpbox \tabu@startpboxmeasure + \else \let\@startpbox \tabu@startpboxquick + \fi\fi + \expandafter \tabu@measuring + \fi +}% \tabu@strategy +\def\tabu@measuring{\expandafter \tabu@trial \expandafter + \count@ \the\count@ \tabu@endtrial +}% \tabu@measuring +\def\tabu@trial{\iftabu@long \tabu@longtrial \else \tabu@shorttrial \fi} +\def\tabu@shorttrial {\setbox\tabu@box \hbox\bgroup \tabu@seteverycr + \ifx \tabu@savecounters\relax \else + \let\tabu@savecounters \relax \tabu@clckpt \fi + $\iftabuscantokens \tabu@rescan \else \expandafter\@secondoftwo \fi + \expandafter{\expandafter \tabuthepreamble + \the\tabu@thebody + \csname tabu@adl@endtrial\endcsname + \endarray}$\egroup % got \tabu@box +}% \tabu@shorttrial +\def\tabu@longtrial {\setbox\tabu@box \hbox\bgroup \tabu@seteverycr + \ifx \tabu@savecounters\relax \else + \let\tabu@savecounters \relax \tabu@clckpt \fi + \iftabuscantokens \tabu@rescan \else \expandafter\@secondoftwo \fi + \expandafter{\expandafter \tabuthepreamble + \the\tabu@thebody + \tabuendlongtrial}\egroup % got \tabu@box +}% \tabu@longtrial +\def\tabuendlongtrial{% no @ allowed for \scantokens + \LT@echunk \global\setbox\@ne \hbox{\unhbox\@ne}\kern\wd\@ne + \LT@get@widths +}% \tabuendlongtrial +\def\tabu@adl@endtrial{% + \crcr \noalign{\global\adl@ncol \tabu@nbcols}}% anything global is crap, junky and fails ! +\def\tabu@seteverycr {\tabu@reset + \everycr \expandafter{\the\everycr \tabu@everycr}% + \let\everycr \tabu@noeverycr % +}% \tabu@seteverycr +\def\tabu@noeverycr{{\aftergroup\tabu@restoreeverycr \afterassignment}\toks@} +\def\tabu@restoreeverycr {\let\everycr \tabu@@everycr} +\def\tabu@everycr {\iftabu@everyrow \noalign{\tabu@everyrow}\fi} +\def\tabu@endoftrials {% + \iftabuscantokens \expandafter\@firstoftwo + \else \expandafter\@secondoftwo + \fi + {\expandafter \tabu@closetrialsgroup \expandafter + \tabu@rescan \expandafter{% + \expandafter\tabuthepreamble + \the\expandafter\tabu@thebody + \iftabu@long \else \endarray \fi}} + {\expandafter\tabu@closetrialsgroup \expandafter + \tabuthepreamble + \the\tabu@thebody}% + \tabu@endenvir % Finish ! +}% \tabu@endoftrials +\def\tabu@closetrialsgroup {% + \toks@\expandafter{\tabu@endenvir}% + \edef\tabu@bufferX{\endgroup + \tabucolX \the\tabucolX + \tabu@target \the\tabu@target + \tabu@cnt \the\tabu@cnt + \def\noexpand\tabu@endenvir{\the\toks@}% + %Quid de \@halignto = \tabu@halignto ?? + }% \tabu@bufferX + \tabu@bufferX + \ifcase\tabu@nested % print out (outer in mode 0) + \global\tabu@cnt \tabu@cnt + \tabu@evr{\tabu@verticaldynamicadjustment}% + \tabu@celllalign@def{\everypar{}}\let\tabu@cellralign \@empty + \let\@finalstrut \tabu@finalstrut + \else % vertical measure of nested tabu + \tabu@evr{\tabu@verticalinit}% + \tabu@celllalign@def{\tabu@verticalmeasure}% + \def\tabu@cellralign{\tabu@verticalspacing}% + \fi + \tabu@clckpt \let\@halignto \tabu@halignto + \let\@halignto \@empty + \tabu@seteverycr + \ifdim \tabustrutrule>\z@ \ifnum\tabu@nested=\z@ + \setbox\@arstrutbox \box\voidb@x % force \@arstrutbox to be rebuilt (visible struts) + \fi\fi +}% \tabu@closetrialsgroup +\def\tabu@quickend {\expandafter \endgroup \expandafter + \tabu@target \the\tabu@target \tabu@quickrule + \let\endarray \relax \tabu@endenvir +}% \tabu@quickend +\def\tabu@endtrial {\relax % stops \count@ assignment ! + \ifcase \count@ \tabu@err % case 0 = impossible here + \or \tabu@err % case 1 = impossible here + \or \tabu@err % case 2 = impossible here + \or % case 3 = outer goes into mode 0 + \def\tabu@bufferX{\endgroup}\count@ \z@ + \else % case 4 = outer goes into mode 3 + \iftabu@spread \tabu@spreadarith % inner into mode 1 (outer in mode 3) + \else \tabu@arith % or 2 (outer in mode 4) + \fi + \count@=% + \ifcase\tabu@nested \thr@@ % outer goes into mode 3 + \else\iftabu@measuring \tw@ % outer is in mode 4 + \else \@ne % outer is in mode 3 + \fi\fi + \edef\tabu@bufferX{\endgroup + \tabucolX \the\tabucolX + \tabu@target \the\tabu@target}% + \fi + \expandafter \tabu@bufferX \expandafter + \count@ \the\count@ \tabu@strategy +}% \tabu@endtrial +\def\tabu@err{\errmessage{(tabu) Internal impossible error! (\count@=\the\count@)}} +%% The algorithms: compute the widths / stop or go on --------------- +\def\tabu@arithnegcoef {% + \@tempdima \z@ \dimen@ \z@ \let\tabu@ \tabu@arith@negcoef \tabu@Xcoefs +}% \tabu@arithnegcoef +\def\tabu@arith@negcoef #1#2{% + \ifdim #2\p@>\z@ \advance\dimen@ #2\p@ % saturated by definition + \advance\@tempdima #2\tabucolX + \else + \ifdim -#2\tabucolX <\tabu@wd{#1}% c_i X < natural width <= \tabu@target-> saturated + \advance\dimen@ -#2\p@ + \advance\@tempdima -#2\tabucolX + \else + \advance\@tempdima \tabu@wd{#1}% natural width <= c_i X => neutralised + \ifdim \tabu@wd{#1}<\tabu@target \else % neutralised + \advance\dimen@ -#2\p@ % saturated (natural width = tabu@target) + \fi + \fi + \fi +}% \tabu@arith@negcoef +\def\tabu@givespace #1#2{% here \tabu@DELTA < \z@ + \ifdim \@tempdima=\z@ + \tabu@wddef{#1}{\the\dimexpr -\tabu@DELTA*\p@/\tabu@Xsum}% + \else + \tabu@wddef{#1}{\the\dimexpr \tabu@hsize{#1}{#2} + *(\p@ -\tabu@DELTA*\p@/\@tempdima)/\p@\relax}% + \fi +}% \tabu@givespace +\def\tabu@arith {\advance\tabu@cnt \@ne + \ifnum \tabu@cnt=\@ne \tabu@message{\tabu@titles}\fi + \tabu@arithnegcoef + \@tempdimb \dimexpr \wd\tabu@box -\@tempdima \relax % + \tabu@DELTA = \dimexpr \wd\tabu@box - \tabu@target \relax + \tabu@message{\tabu@message@arith}% + \ifdim \tabu@DELTA <\tabu@hfuzz + \ifdim \tabu@DELTA<\z@ % wd (tabu)<\tabu@target ? + \let\tabu@ \tabu@givespace \tabu@Xcoefs + \advance\@tempdima \@tempdimb \advance\@tempdima -\tabu@DELTA % for message + \else % already converged: nothing to do but nearly impossible... + \fi + \tabucolX \maxdimen + \tabu@measuringfalse + \else % need for narrower X columns + \tabucolX =\dimexpr (\@tempdima -\tabu@DELTA) *\p@/\tabu@Xsum \relax + \tabu@measuringtrue + \@whilesw \iftabu@measuring\fi {% + \advance\tabu@cnt \@ne + \tabu@arithnegcoef + \tabu@DELTA =\dimexpr \@tempdima+\@tempdimb -\tabu@target \relax % always < 0 here + \tabu@message{\tabu@header + \tabu@msgalign \tabucolX { }{ }{ }{ }{ }\@@ + \tabu@msgalign \@tempdima+\@tempdimb { }{ }{ }{ }{ }\@@ + \tabu@msgalign \tabu@target { }{ }{ }{ }{ }\@@ + \tabu@msgalign@PT \dimen@ { }{}{}{}{}{}{}\@@ + \ifdim -\tabu@DELTA<\tabu@hfuzz \tabu@spaces target ok\else + \tabu@msgalign \dimexpr -\tabu@DELTA *\p@/\dimen@ {}{}{}{}{}\@@ + \fi}% + \ifdim -\tabu@DELTA<\tabu@hfuzz + \advance\@tempdima \@tempdimb % for message + \tabu@measuringfalse + \else + \advance\tabucolX \dimexpr -\tabu@DELTA *\p@/\dimen@ \relax + \fi + }% + \fi + \tabu@message{\tabu@message@reached}% + \edef\tabu@bufferX{\endgroup \tabu@cnt \the\tabu@cnt + \tabucolX \the\tabucolX + \tabu@target \the\tabu@target}% +}% \tabu@arith +\def\tabu@spreadarith {% + \dimen@ \z@ \@tempdima \tabu@naturalXmax \let\tabu@ \tabu@spread@arith \tabu@Xcoefs + \edef\tabu@naturalXmin {\the\dimexpr\tabu@naturalXmin*\dimen@/\p@}% + \@tempdimc =\dimexpr \wd\tabu@box -\tabu@naturalXmax+\tabu@naturalXmin \relax + \iftabu@measuring + \tabu@target =\dimexpr \@tempdimc+\tabu@spreadtarget \relax + \edef\tabu@bufferX{\endgroup \tabucolX \the\tabucolX \tabu@target\the\tabu@target}% + \else + \tabu@message{\tabu@message@spreadarith}% + \ifdim \dimexpr \@tempdimc+\tabu@spreadtarget >\tabu@target + \tabu@message{(tabu) spread + \ifdim \@tempdimc>\tabu@target useless here: default target used% + \else too large: reduced to fit default target\fi.}% + \else + \tabu@target =\dimexpr \@tempdimc+\tabu@spreadtarget \relax + \tabu@message{(tabu) spread: New target set to \the\tabu@target^^J}% + \fi + \begingroup \let\tabu@wddef \@gobbletwo + \@tempdimb \@tempdima + \tabucolX@init + \tabu@arithnegcoef + \wd\tabu@box =\dimexpr \wd\tabu@box +\@tempdima-\@tempdimb \relax + \expandafter\endgroup \expandafter\tabucolX \the\tabucolX + \tabu@arith + \fi +}% \tabu@spreadarith +\def\tabu@spread@arith #1#2{% + \ifdim #2\p@>\z@ \advance\dimen@ #2\p@ + \else \advance\@tempdima \tabu@wd{#1}\relax + \fi +}% \tabu@spread@arith +%% Reporting in the .log file --------------------------------------- +\def\tabu@message@defaulttarget{% + \ifnum\tabu@nested=\z@^^J(tabu) Default target: + \ifx\tabudefaulttarget\linewidth \string\linewidth + \ifdim \tabu@thetarget=\linewidth \else + -\the\dimexpr\linewidth-\tabu@thetarget\fi = + \else\ifx\tabudefaulttarget\linegoal\string\linegoal= + \fi\fi + \else (tabu) Default target (nested): \fi + \the\tabu@target \on@line + \ifnum\tabu@nested=\z@ , page \the\c@page\fi} +\def\tabu@message@target {^^J(tabu) Target specified: + \the\tabu@target \on@line, page \the\c@page} +\def\tabu@message@arith {\tabu@header + \tabu@msgalign \tabucolX { }{ }{ }{ }{ }\@@ + \tabu@msgalign \wd\tabu@box { }{ }{ }{ }{ }\@@ + \tabu@msgalign \tabu@target { }{ }{ }{ }{ }\@@ + \tabu@msgalign@PT \dimen@ { }{}{}{}{}{}{}\@@ + \ifdim \tabu@DELTA<\tabu@hfuzz giving space\else + \tabu@msgalign \dimexpr (\@tempdima-\tabu@DELTA) *\p@/\tabu@Xsum -\tabucolX {}{}{}{}{}\@@ + \fi +}% \tabu@message@arith +\def\tabu@message@spreadarith {\tabu@spreadheader + \tabu@msgalign \tabu@spreadtarget { }{ }{ }{ }{}\@@ + \tabu@msgalign \wd\tabu@box { }{ }{ }{ }{}\@@ + \tabu@msgalign -\tabu@naturalXmax { }{}{}{}{}\@@ + \tabu@msgalign \tabu@naturalXmin { }{ }{ }{ }{}\@@ + \tabu@msgalign \ifdim \dimexpr\@tempdimc>\tabu@target \tabu@target + \else \@tempdimc+\tabu@spreadtarget \fi + {}{}{}{}{}\@@} +\def\tabu@message@negcoef #1#2{ + \tabu@spaces\tabu@spaces\space * #1. X[\rem@pt#2]: + \space width = \tabu@wd {#1} + \expandafter\string\csname tabu@\the\tabu@nested.W\number#1\endcsname + \ifdim -\tabu@pt#2\tabucolX<\tabu@target + < \number-\rem@pt#2 X + = \the\dimexpr -\tabu@pt#2\tabucolX \relax + \else + <= \the\tabu@target\space < \number-\rem@pt#2 X\fi} +\def\tabu@message@reached{\tabu@header + ******* Reached Target: + hfuzz = \tabu@hfuzz\on@line\space *******} +\def\tabu@message@etime{\edef\tabu@stoptime{\the\pdfelapsedtime}% + \tabu@message{(tabu)\tabu@spaces Time elapsed during measure: + \the\numexpr(\tabu@stoptime-\tabu@starttime-32767)/65536\relax sec + \the\numexpr\numexpr(\tabu@stoptime-\tabu@starttime) + -\numexpr(\tabu@stoptime-\tabu@starttime-32767)/65536\relax*65536\relax + *1000/65536\relax ms \tabu@spaces(\the\tabu@cnt\space + cycle\ifnum\tabu@cnt>\@ne s\fi)^^J^^J}} +\def\tabu@message@verticalsp {% + \ifdim \@tempdima>\tabu@ht + \ifdim \@tempdimb>\tabu@dp + \expandafter\expandafter\expandafter\string\tabu@ht = + \tabu@msgalign \@tempdima { }{ }{ }{ }{ }\@@ + \expandafter\expandafter\expandafter\string\tabu@dp = + \tabu@msgalign \@tempdimb { }{ }{ }{ }{ }\@@^^J% + \else + \expandafter\expandafter\expandafter\string\tabu@ht = + \tabu@msgalign \@tempdima { }{ }{ }{ }{ }\@@^^J% + \fi + \else\ifdim \@tempdimb>\tabu@dp + \tabu@spaces\tabu@spaces\tabu@spaces + \expandafter\expandafter\expandafter\string\tabu@dp = + \tabu@msgalign \@tempdimb { }{ }{ }{ }{ }\@@^^J\fi + \fi +}% \tabu@message@verticalsp +\edef\tabu@spaces{\@spaces} +\def\tabu@strippt{\expandafter\tabu@pt\the} +{\@makeother\P \@makeother\T\lowercase{\gdef\tabu@pt #1PT{#1}}} +\def\tabu@msgalign{\expandafter\tabu@msg@align\the\dimexpr} +\def\tabu@msgalign@PT{\expandafter\tabu@msg@align\romannumeral-`\0\tabu@strippt} +\def\do #1{% + \def\tabu@msg@align##1.##2##3##4##5##6##7##8##9\@@{% + \ifnum##1<10 #1 #1\else + \ifnum##1<100 #1 \else + \ifnum##1<\@m #1\fi\fi\fi + ##1.##2##3##4##5##6##7##8#1}% + \def\tabu@header{(tabu) \ifnum\tabu@cnt<10 #1\fi\the\tabu@cnt) }% + \def\tabu@titles{\ifnum \tabu@nested=\z@ + (tabu) Try#1 #1 tabu X #1 #1 #1tabu Width #1 #1 Target + #1 #1 #1 Coefs #1 #1 #1 Update^^J\fi}% + \def\tabu@spreadheader{% + (tabu) Try#1 #1 Spread #1 #1 tabu Width #1 #1 #1 Nat. X #1 #1 #1 #1Nat. Min. + #1 New Target^^J% + (tabu) sprd} + \def\tabu@message@save {\begingroup + \def\x ####1{\tabu@msg@align ####1{ }{ }{ }{ }{}\@@} + \def\z ####1{\expandafter\x\expandafter{\romannumeral-`\0\tabu@strippt + \dimexpr####1\p@{ }{ }}}% + \let\color \relax \def\tabu@rulesstyle ####1####2{\detokenize{####1}}% + \let\CT@arc@ \relax \let\@preamble \@gobble + \let\tabu@savedpream \@firstofone + \let\tabu@savedparams \@firstofone + \def\tabu@target ####1\relax {(tabu) target #1 #1 #1 #1 #1 = \x{####1}^^J}% + \def\tabucolX ####1\relax {(tabu) X columns width#1 = \x{####1}^^J}% + \def\tabu@nbcols ####1\relax {(tabu) Number of columns: \z{####1}^^J}% + \def\tabu@aligndefault ####1{(tabu) Default alignment: #1 #1 ####1^^J}% + \def\col@sep ####1\relax {(tabu) column sep #1 #1 #1 = \x{####1}^^J}% + \def\arrayrulewidth ####1\relax{(tabu) arrayrulewidth #1 = \x{####1}}% + \def\doublerulesep ####1\relax { doublerulesep = \x{####1}^^J}% + \def\extratabsurround####1\relax{(tabu) extratabsurround = \x{####1}^^J}% + \def\extrarowheight ####1\relax{(tabu) extrarowheight #1 = \x{####1}}% + \def\extrarowdepth ####1\relax {extrarowdepth = \x{####1}^^J}% + \def\abovetabulinesep####1\relax{(tabu) abovetabulinesep=\x{####1} }% + \def\belowtabulinesep####1\relax{ belowtabulinesep=\x{####1}^^J}% + \def\arraystretch ####1{(tabu) arraystretch #1 #1 = \z{####1}^^J}% + \def\minrowclearance####1\relax{(tabu) minrowclearance #1 = \x{####1}^^J}% + \def\tabu@arc@L ####1{(tabu) taburulecolor #1 #1 = ####1^^J}% + \def\tabu@drsc@L ####1{(tabu) tabudoublerulecolor= ####1^^J}% + \def\tabu@evr@L ####1{(tabu) everyrow #1 #1 #1 #1 = \detokenize{####1}^^J}% + \def\tabu@ls@L ####1{(tabu) line style = \detokenize{####1}^^J}% + \def\NC@find ####1\@nil{(tabu) tabu preamble#1 #1 = \detokenize{####1}^^J}% + \def\tabu@wddef####1####2{(tabu) Natural width ####1 = \x{####2}^^J}% + \let\edef \@gobbletwo \let\def \@empty \let\let \@gobbletwo + \tabu@message{% + (tabu) \string\savetabu{\tabu@temp}: \on@line^^J% + \tabu@usetabu \@nil^^J}% + \endgroup} +}\do{ } +%% Measuring the natural width (varwidth) - store the results ------- +\def\tabu@startpboxmeasure #1{\bgroup % entering \vtop + \edef\tabu@temp{\expandafter\@secondoftwo \ifx\tabu@hsize #1\else\relax\fi}% + \ifodd 1\ifx \tabu@temp\@empty 0 \else % starts with \tabu@hsize ? + \iftabu@spread \else % if spread -> measure + \ifdim \tabu@temp\p@>\z@ 0 \fi\fi\fi% if coef>0 -> do not measure + \let\@startpbox \tabu@startpboxORI % restore immediately (nesting) + \tabu@measuringtrue % for the quick option... + \tabu@Xcol =\expandafter\@firstoftwo\ifx\tabu@hsize #1\fi + \ifdim \tabu@temp\p@>\z@ \ifdim \tabu@temp\tabucolX<\tabu@target + \tabu@target=\tabu@temp\tabucolX \fi\fi + \setbox\tabu@box \hbox \bgroup + \begin{varwidth}\tabu@target + \let\FV@ListProcessLine \tabu@FV@ListProcessLine % \hbox to natural width... + \narrowragged \arraybackslash \parfillskip \@flushglue + \ifdefined\pdfadjustspacing \pdfadjustspacing\z@ \fi + \bgroup \aftergroup\tabu@endpboxmeasure + \ifdefined \cellspacetoplimit \tabu@cellspacepatch \fi + \else \expandafter\@gobble + \tabu@startpboxquick{#1}% \@gobble \bgroup + \fi +}% \tabu@startpboxmeasure +\def\tabu@cellspacepatch{\def\bcolumn##1\@nil{}\let\ecolumn\@empty + \bgroup\color@begingroup} +\def\tabu@endpboxmeasure {% + \@finalstrut \@arstrutbox + \end{varwidth}\egroup % + \ifdim \tabu@temp\p@ <\z@ % neg coef + \ifdim \tabu@wd\tabu@Xcol <\wd\tabu@box + \tabu@wddef\tabu@Xcol {\the\wd\tabu@box}% + \tabu@debug{\tabu@message@endpboxmeasure}% + \fi + \else % spread coef>0 + \global\advance \tabu@naturalX \wd\tabu@box + \@tempdima =\dimexpr \wd\tabu@box *\p@/\dimexpr \tabu@temp\p@\relax \relax + \ifdim \tabu@naturalXmax <\tabu@naturalX + \xdef\tabu@naturalXmax {\the\tabu@naturalX}\fi + \ifdim \tabu@naturalXmin <\@tempdima + \xdef\tabu@naturalXmin {\the\@tempdima}\fi + \fi + \box\tabu@box \egroup % end of \vtop (measure) restore \tabu@target +}% \tabu@endpboxmeasure +\def\tabu@wddef #1{\expandafter\xdef + \csname tabu@\the\tabu@nested.W\number#1\endcsname} +\def\tabu@wd #1{\csname tabu@\the\tabu@nested.W\number#1\endcsname} +\def\tabu@message@endpboxmeasure{\tabu@spaces\tabu@spaces<-> % <-> save natural wd + \the\tabu@Xcol. X[\tabu@temp]: + target = \the\tabucolX \space + \expandafter\expandafter\expandafter\string\tabu@wd\tabu@Xcol + =\tabu@wd\tabu@Xcol +}% \tabu@message@endpboxmeasure +\def\tabu@startpboxquick {\bgroup + \let\@startpbox \tabu@startpboxORI % restore immediately + \let\tabu \tabu@quick % \begin is expanded before... + \expandafter\@gobble \@startpbox % gobbles \bgroup +}% \tabu@startpboxquick +\def\tabu@quick {\begingroup \iffalse{\fi \ifnum0=`}\fi + \toks@{}\def\tabu@stack{b}\tabu@collectbody \tabu@endquick +}% \tabu@quick +\def\tabu@endquick {% + \ifodd 1\ifx\tabu@end@envir\tabu@endtabu \else + \ifx\tabu@end@envir\tabu@endtabus \else 0\fi\fi\relax + \endgroup + \else \let\endtabu \relax + \tabu@end@envir + \fi +}% \tabu@quick +\def\tabu@endtabu {\end{tabu}} +\def\tabu@endtabus {\end{tabu*}} +%% Measuring the heights and depths - store the results ------------- +\def\tabu@verticalmeasure{\everypar{}% + \ifnum \currentgrouptype>12 % 14=semi-simple, 15=math shift group + \setbox\tabu@box =\hbox\bgroup + \let\tabu@verticalspacing \tabu@verticalsp@lcr + \d@llarbegin % after \hbox ... + \else + \edef\tabu@temp{\ifnum\currentgrouptype=5\vtop + \else\ifnum\currentgrouptype=12\vcenter + \else\vbox\fi\fi}% + \setbox\tabu@box \hbox\bgroup$\tabu@temp \bgroup + \let\tabu@verticalspacing \tabu@verticalsp@pmb + \fi +}% \tabu@verticalmeasure +\def\tabu@verticalsp@lcr{% + \d@llarend \egroup % + \@tempdima \dimexpr \ht\tabu@box+\abovetabulinesep + \@tempdimb \dimexpr \dp\tabu@box+\belowtabulinesep \relax + \ifdim\tabustrutrule>\z@ \tabu@debug{\tabu@message@verticalsp}\fi + \ifdim \tabu@ht<\@tempdima \tabu@htdef{\the\@tempdima}\fi + \ifdim \tabu@dp<\@tempdimb \tabu@dpdef{\the\@tempdimb}\fi + \noindent\vrule height\@tempdima depth\@tempdimb +}% \tabu@verticalsp@lcr +\def\tabu@verticalsp@pmb{% inserts struts as needed + \par \expandafter\egroup + \expandafter$\expandafter + \egroup \expandafter + \@tempdimc \the\prevdepth + \@tempdima \dimexpr \ht\tabu@box+\abovetabulinesep + \@tempdimb \dimexpr \dp\tabu@box+\belowtabulinesep \relax + \ifdim\tabustrutrule>\z@ \tabu@debug{\tabu@message@verticalsp}\fi + \ifdim \tabu@ht<\@tempdima \tabu@htdef{\the\@tempdima}\fi + \ifdim \tabu@dp<\@tempdimb \tabu@dpdef{\the\@tempdimb}\fi + \let\@finalstrut \@gobble + \hrule height\@tempdima depth\@tempdimb width\hsize +%% \box\tabu@box +}% \tabu@verticalsp@pmb + +\def\tabu@verticalinit{% + \ifnum \c@taburow=\z@ \tabu@rearstrut \fi % after \tabu@reset ! + \advance\c@taburow \@ne + \tabu@htdef{\the\ht\@arstrutbox}\tabu@dpdef{\the\dp\@arstrutbox}% + \advance\c@taburow \m@ne +}% \tabu@verticalinit +\def\tabu@htdef {\expandafter\xdef \csname tabu@\the\tabu@nested.H\the\c@taburow\endcsname} +\def\tabu@ht {\csname tabu@\the\tabu@nested.H\the\c@taburow\endcsname} +\def\tabu@dpdef {\expandafter\xdef \csname tabu@\the\tabu@nested.D\the\c@taburow\endcsname} +\def\tabu@dp {\csname tabu@\the\tabu@nested.D\the\c@taburow\endcsname} +\def\tabu@verticaldynamicadjustment {% + \advance\c@taburow \@ne + \extrarowheight \dimexpr\tabu@ht - \ht\strutbox + \extrarowdepth \dimexpr\tabu@dp - \dp\strutbox + \let\arraystretch \@empty + \advance\c@taburow \m@ne +}% \tabu@verticaldynamicadjustment +\def\tabuphantomline{\crcr \noalign{% + {\globaldefs \@ne + \setbox\@arstrutbox \box\voidb@x + \let\tabu@@celllalign \tabu@celllalign + \let\tabu@@cellralign \tabu@cellralign + \let\tabu@@cellleft \tabu@cellleft + \let\tabu@@cellright \tabu@cellright + \let\tabu@@thevline \tabu@thevline + \let\tabu@celllalign \@empty + \let\tabu@cellralign \@empty + \let\tabu@cellright \@empty + \let\tabu@cellleft \@empty + \let\tabu@thevline \relax}% + \edef\tabu@temp{\tabu@multispan \tabu@nbcols{\noindent &}}% + \toks@\expandafter{\tabu@temp \noindent\tabu@everyrowfalse \cr + \noalign{\tabu@rearstrut + {\globaldefs\@ne + \let\tabu@celllalign \tabu@@celllalign + \let\tabu@cellralign \tabu@@cellralign + \let\tabu@cellleft \tabu@@cellleft + \let\tabu@cellright \tabu@@cellright + \let\tabu@thevline \tabu@@thevline}}}% + \expandafter}\the\toks@ +}% \tabuphantomline +%% \firsthline and \lasthline corrections --------------------------- +\def\tabu@firstline {\tabu@hlineAZ \tabu@firsthlinecorrection {}} +\def\tabu@firsthline{\tabu@hlineAZ \tabu@firsthlinecorrection \hline} +\def\tabu@lastline {\tabu@hlineAZ \tabu@lasthlinecorrection {}} +\def\tabu@lasthline {\tabu@hlineAZ \tabu@lasthlinecorrection \hline} +\def\tabu@hline {% replaces \hline if no colortbl (see \AtBeginDocument) + \noalign{\ifnum0=`}\fi + {\CT@arc@\hrule height\arrayrulewidth}% + \futurelet \tabu@temp \tabu@xhline +}% \tabu@hline +\def\tabu@xhline{% + \ifx \tabu@temp \hline + {\ifx \CT@drsc@\relax \vskip + \else\ifx \CT@drsc@\@empty \vskip + \else \CT@drsc@\hrule height + \fi\fi + \doublerulesep}% + \fi + \ifnum0=`{\fi}% +}% \tabu@xhline +\def\tabu@hlineAZ #1#2{\noalign{\ifnum0=`}\fi \dimen@ \z@ \count@ \z@ + \toks@{}\def\tabu@hlinecorrection{#1}\def\tabu@temp{#2}% + \tabu@hlineAZsurround +}% \tabu@hlineAZ +\newcommand*\tabu@hlineAZsurround[1][\extratabsurround]{% + \extratabsurround #1\let\tabucline \tabucline@scan + \let\hline \tabu@hlinescan \let\firsthline \hline + \let\cline \tabu@clinescan \let\lasthline \hline + \expandafter \futurelet \expandafter \tabu@temp + \expandafter \tabu@nexthlineAZ \tabu@temp +}% \tabu@hlineAZsurround +\def\tabu@hlinescan {\tabu@thick \arrayrulewidth \tabu@xhlineAZ \hline} +\def\tabu@clinescan #1{\tabu@thick \arrayrulewidth \tabu@xhlineAZ {\cline{#1}}} +\def\tabucline@scan{\@testopt \tabucline@sc@n {}} +\def\tabucline@sc@n #1[#2]{\tabu@xhlineAZ {\tabucline[{#1}]{#2}}} +\def\tabu@nexthlineAZ{% + \ifx \tabu@temp\hline \else + \ifx \tabu@temp\cline \else + \ifx \tabu@temp\tabucline \else + \tabu@hlinecorrection + \fi\fi\fi +}% \tabu@nexthlineAZ +\def\tabu@xhlineAZ #1{% + \toks@\expandafter{\the\toks@ #1}% + \@tempdimc \tabu@thick % The last line width + \ifcase\count@ \@tempdimb \tabu@thick % The first line width + \else \advance\dimen@ \dimexpr \tabu@thick+\doublerulesep \relax + \fi + \advance\count@ \@ne \futurelet \tabu@temp \tabu@nexthlineAZ +}% \tabu@xhlineAZ +\def\tabu@firsthlinecorrection{% \count@ = number of \hline -1 + \@tempdima \dimexpr \ht\@arstrutbox+\dimen@ + \edef\firsthline{% + \omit \hbox to\z@{\hss{\noexpand\tabu@DBG{yellow}\vrule + height \the\dimexpr\@tempdima+\extratabsurround + depth \dp\@arstrutbox + width \tabustrutrule}\hss}\cr + \noalign{\vskip -\the\dimexpr \@tempdima+\@tempdimb + +\dp\@arstrutbox \relax}% + \the\toks@ + }\ifnum0=`{\fi + \expandafter}\firsthline % we are then ! +}% \tabu@firsthlinecorrection +\def\tabu@lasthlinecorrection{% + \@tempdima \dimexpr \dp\@arstrutbox+\dimen@+\@tempdimb+\@tempdimc + \edef\lasthline{% + \the\toks@ + \noalign{\vskip -\the\dimexpr\dimen@+\@tempdimb+\dp\@arstrutbox}% + \omit \hbox to\z@{\hss{\noexpand\tabu@DBG{yellow}\vrule + depth \the\dimexpr \dp\@arstrutbox+\@tempdimb+\dimen@ + +\extratabsurround-\@tempdimc + height \z@ + width \tabustrutrule}\hss}\cr + }\ifnum0=`{\fi + \expandafter}\lasthline % we are then ! +}% \tabu@lasthlinecorrection +\def\tabu@LT@@hline{% + \ifx\LT@next\hline + \global\let\LT@next \@gobble + \ifx \CT@drsc@\relax + \gdef\CT@LT@sep{% + \noalign{\penalty-\@medpenalty\vskip\doublerulesep}}% + \else + \gdef\CT@LT@sep{% + \multispan\LT@cols{% + \CT@drsc@\leaders\hrule\@height\doublerulesep\hfill}\cr}% + \fi + \else + \global\let\LT@next\empty + \gdef\CT@LT@sep{% + \noalign{\penalty-\@lowpenalty\vskip-\arrayrulewidth}}% + \fi + \ifnum0=`{\fi}% + \multispan\LT@cols + {\CT@arc@\leaders\hrule\@height\arrayrulewidth\hfill}\cr + \CT@LT@sep + \multispan\LT@cols + {\CT@arc@\leaders\hrule\@height\arrayrulewidth\hfill}\cr + \noalign{\penalty\@M}% + \LT@next +}% \tabu@LT@@hline +%% Horizontal lines : \tabucline ------------------------------------ +\let\tabu@start \@tempcnta +\let\tabu@stop \@tempcntb +\newcommand*\tabucline{\noalign{\ifnum0=`}\fi \tabu@cline} +\newcommand*\tabu@cline[2][]{\tabu@startstop{#2}% + \ifnum \tabu@stop<\z@ \toks@{}% + \else \tabu@clinearg{#1}\tabu@thestyle + \edef\tabucline{\toks@{% + \ifnum \tabu@start>\z@ \omit + \tabu@multispan\tabu@start {\span\omit}&\fi + \omit \tabu@multispan\tabu@stop {\span\omit}% + \tabu@thehline\cr + }}\tabucline + \tabu@tracinglines{(tabu:tabucline) Style: #1^^J\the\toks@^^J^^J}% + \fi + \futurelet \tabu@temp \tabu@xcline +}% \tabu@cline +\def\tabu@clinearg #1{% + \ifx\\#1\\\let\tabu@thestyle \tabu@ls@ + \else \@defaultunits \expandafter\let\expandafter\@tempa + \romannumeral-`\0#1\relax \@nnil + \ifx \hbox\@tempa \tabu@clinebox{#1}% + \else\ifx \box\@tempa \tabu@clinebox{#1}% + \else\ifx \vbox\@tempa \tabu@clinebox{#1}% + \else\ifx \vtop\@tempa \tabu@clinebox{#1}% + \else\ifx \copy\@tempa \tabu@clinebox{#1}% + \else\ifx \leaders\@tempa \tabu@clineleads{#1}% + \else\ifx \cleaders\@tempa \tabu@clineleads{#1}% + \else\ifx \xleaders\@tempa \tabu@clineleads{#1}% + \else\tabu@getline {#1}% + \fi\fi\fi\fi\fi\fi\fi\fi + \fi +}% \tabu@clinearg +\def\tabu@clinebox #1{\tabu@clineleads{\xleaders#1\hss}} +\def\tabu@clineleads #1{% + \let\tabu@thestyle \relax \let\tabu@leaders \@undefined + \gdef\tabu@thehrule{#1}} +\def\tabu@thehline{\begingroup + \ifdefined\tabu@leaders + \noexpand\tabu@thehleaders + \else \noexpand\tabu@thehrule + \fi \endgroup +}% \tabu@thehline +\def\tabu@xcline{% + \ifx \tabu@temp\tabucline + \toks@\expandafter{\the\toks@ \noalign + {\ifx\CT@drsc@\relax \vskip + \else \CT@drsc@\hrule height + \fi + \doublerulesep}}% + \fi + \tabu@docline +}% \tabu@xcline +\def\tabu@docline {\ifnum0=`{\fi \expandafter}\the\toks@} +\def\tabu@docline@evr {\xdef\tabu@doclineafter{\the\toks@}% + \ifnum0=`{\fi}\aftergroup\tabu@doclineafter} +\def\tabu@multispan #1#2{% + \ifnum\numexpr#1>\@ne #2\expandafter\tabu@multispan + \else \expandafter\@gobbletwo + \fi {#1-1}{#2}% +}% \tabu@multispan +\def\tabu@startstop #1{\tabu@start@stop #1\relax 1-\tabu@nbcols \@nnil} +\def\tabu@start@stop #1-#2\@nnil{% + \@defaultunits \tabu@start\number 0#1\relax \@nnil + \@defaultunits \tabu@stop \number 0#2\relax \@nnil + \tabu@stop \ifnum \tabu@start>\tabu@nbcols \m@ne + \else\ifnum \tabu@stop=\z@ \tabu@nbcols + \else\ifnum \tabu@stop>\tabu@nbcols \tabu@nbcols + \else \tabu@stop + \fi\fi\fi + \advance\tabu@start \m@ne + \ifnum \tabu@start>\z@ \advance\tabu@stop -\tabu@start \fi +}% \tabu@start@stop +%% Numbers: siunitx S columns (and \tabudecimal) ------------------- +\def\tabu@tabudecimal #1{% + \def\tabu@decimal{#1}\@temptokena{}% + \let\tabu@getdecimal@ \tabu@getdecimal@ignorespaces + \tabu@scandecimal +}% \tabu@tabudecimal +\def\tabu@scandecimal{\futurelet \tabu@temp \tabu@getdecimal@} +\def\tabu@skipdecimal#1{#1\tabu@scandecimal} +\def\tabu@getdecimal@ignorespaces{% + \ifcase 0\ifx\tabu@temp\ignorespaces\else + \ifx\tabu@temp\@sptoken1\else + 2\fi\fi\relax + \let\tabu@getdecimal@ \tabu@getdecimal + \expandafter\tabu@skipdecimal + \or \expandafter\tabu@gobblespace\expandafter\tabu@scandecimal + \else \expandafter\tabu@skipdecimal + \fi +}% \tabu@getdecimal@ignorespaces +\def\tabu@get@decimal#1{\@temptokena\expandafter{\the\@temptokena #1}% + \tabu@scandecimal} +\def\do#1{% + \def\tabu@get@decimalspace#1{% + \@temptokena\expandafter{\the\@temptokena #1}\tabu@scandecimal}% +}\do{ } +\let\tabu@@tabudecimal \tabu@tabudecimal +\def\tabu@getdecimal{% + \ifcase 0\ifx 0\tabu@temp\else + \ifx 1\tabu@temp\else + \ifx 2\tabu@temp\else + \ifx 3\tabu@temp\else + \ifx 4\tabu@temp\else + \ifx 5\tabu@temp\else + \ifx 6\tabu@temp\else + \ifx 7\tabu@temp\else + \ifx 8\tabu@temp\else + \ifx 9\tabu@temp\else + \ifx .\tabu@temp\else + \ifx ,\tabu@temp\else + \ifx -\tabu@temp\else + \ifx +\tabu@temp\else + \ifx e\tabu@temp\else + \ifx E\tabu@temp\else + \ifx\tabu@cellleft\tabu@temp1\else + \ifx\ignorespaces\tabu@temp1\else + \ifx\@sptoken\tabu@temp2\else + 3\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\relax + \expandafter\tabu@get@decimal + \or \expandafter\tabu@skipdecimal + \or \expandafter\tabu@get@decimalspace + \else\expandafter\tabu@printdecimal + \fi +}% \tabu@getdecimal +\def\tabu@printdecimal{% + \edef\tabu@temp{\the\@temptokena}% + \ifx\tabu@temp\@empty\else + \ifx\tabu@temp\space\else + \expandafter\tabu@decimal\expandafter{\the\@temptokena}% + \fi\fi +}% \tabu@printdecimal +%% Verbatim inside X columns ---------------------------------------- +\def\tabu@verbatim{% + \let\verb \tabu@verb + \let\FV@DefineCheckEnd \tabu@FV@DefineCheckEnd +}% \tabu@verbatim +\let\tabu@ltx@verb \verb +\def\tabu@verb{\@ifstar {\tabu@ltx@verb*} \tabu@ltx@verb} +\def\tabu@fancyvrb {% + \def\tabu@FV@DefineCheckEnd ##1{% + \def\tabu@FV@DefineCheckEnd{% + ##1% + \let\FV@CheckEnd \tabu@FV@CheckEnd + \let\FV@@CheckEnd \tabu@FV@@CheckEnd + \let\FV@@@CheckEnd \tabu@FV@@@CheckEnd + \edef\FV@EndScanning{% + \def\noexpand\next{\noexpand\end{\FV@EnvironName}}% + \global\let\noexpand\FV@EnvironName\relax + \noexpand\next}% + \xdef\FV@EnvironName{\detokenize\expandafter{\FV@EnvironName}}}% + }\expandafter\tabu@FV@DefineCheckEnd\expandafter{\FV@DefineCheckEnd} +}% \tabu@fancyvrb +\def\tabu@FV@CheckEnd #1{\expandafter\FV@@CheckEnd \detokenize{#1\end{}}\@nil} +\edef\tabu@FV@@@CheckEnd {\detokenize{\end{}}} +\begingroup +\catcode`\[1 \catcode`\]2 +\@makeother\{ \@makeother\} + \edef\x[\endgroup + \def\noexpand\tabu@FV@@CheckEnd ##1\detokenize[\end{]##2\detokenize[}]##3% + ]\x \@nil{\def\@tempa{#2}\def\@tempb{#3}} +\def\tabu@FV@ListProcessLine #1{% + \hbox {%to \hsize{% + \kern\leftmargin + \hbox {%to \linewidth{% + \FV@LeftListNumber + \FV@LeftListFrame + \FancyVerbFormatLine{#1}\hss +%% DG/SR modification begin - Jan. 28, 1998 (for numbers=right add-on) +%% \FV@RightListFrame}% + \FV@RightListFrame + \FV@RightListNumber}% +%% DG/SR modification end + \hss}} +%% \savetabu -------------------------------------------------------- +\newcommand*\savetabu[1]{\noalign{% + \tabu@sanitizearg{#1}\tabu@temp + \ifx \tabu@temp\@empty \tabu@savewarn{}{The tabu will not be saved}\else + \@ifundefined{tabu@saved@\tabu@temp}{}{\tabu@savewarn{#1}{Overwriting}}% + \ifdefined\tabu@restored \expandafter\let + \csname tabu@saved@\tabu@temp \endcsname \tabu@restored + \else {\tabu@save}% + \fi + \fi}% +}% \savetabu +\def\tabu@save {% + \toks0\expandafter{\tabu@saved@}% + \iftabu@negcoef + \let\tabu@wddef \relax \let\tabu@ \tabu@savewd \edef\tabu@savewd{\tabu@Xcoefs}% + \toks0\expandafter{\the\toks\expandafter0\tabu@savewd}\fi + \toks1\expandafter{\tabu@savedpream}% + \toks2\expandafter{\tabu@savedpreamble}% + \let\@preamble \relax + \let\tabu@savedpream \relax \let\tabu@savedparams \relax + \edef\tabu@preamble{% + \def\noexpand\tabu@aligndefault{\tabu@align}% + \def\tabu@savedparams {\noexpand\the\toks0}% + \def\tabu@savedpream {\noexpand\the\toks1}}% + \edef\tabu@usetabu{% + \def\@preamble {\noexpand\the\toks2}% + \tabu@target \the\tabu@target \relax + \tabucolX \the\tabucolX \relax + \tabu@nbcols \the\tabu@nbcols \relax + \def\noexpand\tabu@aligndefault{\tabu@align}% + \def\tabu@savedparams {\noexpand\the\toks0}% + \def\tabu@savedpream {\noexpand\the\toks1}}% + \let\tabu@aligndefault \relax \let\@sharp \relax + \edef\@tempa{\noexpand\tabu@s@ved + {\tabu@usetabu} + {\tabu@preamble} + {\the\toks1}}\@tempa + \tabu@message@save +}% \tabu@save +\long\def\tabu@s@ved #1#2#3{% + \def\tabu@usetabu{#1}% + \expandafter\gdef\csname tabu@saved@\tabu@temp\endcsname ##1{% + \ifodd ##1% \usetabu + \tabu@measuringfalse \tabu@spreadfalse % Just in case... + \gdef\tabu@usetabu {% + \ifdim \tabu@target>\z@ \tabu@warn@usetabu \fi + \global\let\tabu@usetabu \@undefined + \def\@halignto {to\tabu@target}% + #1% + \ifx \tabu@align\tabu@aligndefault@text + \ifnum \tabu@nested=\z@ + \let\tabu@align \tabu@aligndefault \fi\fi}% + \else % \preamble + \gdef\tabu@preamble {% + \global\let\tabu@preamble \@undefined + #2% + \ifx \tabu@align\tabu@aligndefault@text + \ifnum \tabu@nested=\z@ + \let\tabu@align \tabu@aligndefault \fi\fi}% + \fi + #3}% +}% \tabu@s@ved +\def\tabu@aligndefault@text {\tabu@aligndefault}% +\def\tabu@warn@usetabu {\PackageWarning{tabu} + {Specifying a target with \string\usetabu\space is useless + \MessageBreak The target cannot be changed!}} +\def\tabu@savewd #1#2{\ifdim #2\p@<\z@ \tabu@wddef{#1}{\tabu@wd{#1}}\fi} +\def\tabu@savewarn#1#2{\PackageInfo{tabu} + {User-name `#1' already used for \string\savetabu + \MessageBreak #2}}% +\def\tabu@saveerr#1{\PackageError{tabu} + {User-name `#1' is unknown for \string\usetabu + \MessageBreak I cannot restore an unknown preamble!}\@ehd} +%% \rowfont --------------------------------------------------------- +\newskip \tabu@cellskip +\def\tabu@rowfont{\ifdim \baselineskip=\z@\noalign\fi + {\ifnum0=`}\fi \tabu@row@font} +\newcommand*\tabu@row@font[2][]{% + \ifnum7=\currentgrouptype + \global\let\tabu@@cellleft \tabu@cellleft + \global\let\tabu@@cellright \tabu@cellright + \global\let\tabu@@celllalign \tabu@celllalign + \global\let\tabu@@cellralign \tabu@cellralign + \global\let\tabu@@rowfontreset\tabu@rowfontreset + \fi + \global\let\tabu@rowfontreset \tabu@rowfont@reset + \expandafter\gdef\expandafter\tabu@cellleft\expandafter{\tabu@cellleft #2}% + \ifcsname tabu@cell@#1\endcsname % row alignment + \csname tabu@cell@#1\endcsname \fi + \ifnum0=`{\fi}% end of group / noalign group +}% \rowfont +\def\tabu@ifcolorleavevmode #1{\let\color \tabu@leavevmodecolor #1\let\color\tabu@color}% +\def\tabu@rowfont@reset{% + \global\let\tabu@rowfontreset \tabu@@rowfontreset + \global\let\tabu@cellleft \tabu@@cellleft + \global\let\tabu@cellright \tabu@@cellright + \global\let\tabu@cellfont \@empty + \global\let\tabu@celllalign \tabu@@celllalign + \global\let\tabu@cellralign \tabu@@cellralign +}% \tabu@@rowfontreset +\let\tabu@rowfontreset \@empty % overwritten \AtBeginDocument if colortbl +%% \tabu@prepnext@tok ----------------------------------------------- +\newif \iftabu@cellright +\def\tabu@prepnext@tok{% + \ifnum \count@<\z@ % + \@tempcnta \@M % + \tabu@nbcols\z@ + \let\tabu@fornoopORI \@fornoop + \tabu@cellrightfalse + \else + \ifcase \numexpr \count@-\@tempcnta \relax % (case 0): prev. token is left + \advance \tabu@nbcols \@ne + \iftabu@cellright % before-previous token is right and is finished + \tabu@cellrightfalse % + \tabu@righttok + \fi + \tabu@lefttok + \or % (case 1) previous token is right + \tabu@cellrighttrue \let\@fornoop \tabu@lastnoop + \else % special column: do not change the token + \iftabu@cellright % before-previous token is right + \tabu@cellrightfalse + \tabu@righttok + \fi + \fi % \ifcase + \fi + \tabu@prepnext@tokORI +}% \tabu@prepnext@tok +\long\def\tabu@lastnoop#1\@@#2#3{\tabu@lastn@@p #2\@nextchar \in@\in@@} +\def\tabu@lastn@@p #1\@nextchar #2#3\in@@{% + \ifx \in@#2\else + \let\@fornoop \tabu@fornoopORI + \xdef\tabu@mkpreambuffer{\tabu@nbcols\the\tabu@nbcols \tabu@mkpreambuffer}% + \toks0\expandafter{\expandafter\tabu@everyrowtrue \the\toks0}% + \expandafter\prepnext@tok + \fi +}% \tabu@lastnoop +\def\tabu@righttok{% + \advance \count@ \m@ne + \toks\count@\expandafter {\the\toks\count@ \tabu@cellright \tabu@cellralign}% + \advance \count@ \@ne +}% \tabu@righttok +\def\tabu@lefttok{\toks\count@\expandafter{\expandafter\tabu@celllalign + \the\toks\count@ \tabu@cellleft}% after because of $ +}% \tabu@lefttok +%% Neutralisation of glues ------------------------------------------ +\let\tabu@cellleft \@empty +\let\tabu@cellright \@empty +\tabu@celllalign@def{\tabu@cellleft}% +\let\tabu@cellralign \@empty +\def\tabu@cell@align #1#2#3{% + \let\tabu@maybesiunitx \toks@ \tabu@celllalign + \global \expandafter \tabu@celllalign@def \expandafter {\the\toks@ #1}% + \toks@\expandafter{\tabu@cellralign #2}% + \xdef\tabu@cellralign{\the\toks@}% + \toks@\expandafter{\tabu@cellleft #3}% + \xdef\tabu@cellleft{\the\toks@}% +}% \tabu@cell@align +\def\tabu@cell@l{% force alignment to left + \tabu@cell@align + {\tabu@removehfil \raggedright \tabu@cellleft}% left + {\tabu@flush1\tabu@ignorehfil}% right + \raggedright +}% \tabu@cell@l +\def\tabu@cell@c{% force alignment to center + \tabu@cell@align + {\tabu@removehfil \centering \tabu@flush{.5}\tabu@cellleft} + {\tabu@flush{.5}\tabu@ignorehfil} + \centering +}% \tabu@cell@c +\def\tabu@cell@r{% force alignment to right + \tabu@cell@align + {\tabu@removehfil \raggedleft \tabu@flush1\tabu@cellleft} + \tabu@ignorehfil + \raggedleft +}% \tabu@cell@r +\def\tabu@cell@j{% force justification (for p, m, b columns) + \tabu@cell@align + {\tabu@justify\tabu@cellleft} + {} + \tabu@justify +}% \tabu@cell@j +\def\tabu@justify{% + \leftskip\z@skip \@rightskip\leftskip \rightskip\@rightskip + \parfillskip\@flushglue +}% \tabu@justify +%% ragged2e settings +\def\tabu@cell@L{% force alignment to left (ragged2e) + \tabu@cell@align + {\tabu@removehfil \RaggedRight \tabu@cellleft} + {\tabu@flush 1\tabu@ignorehfil} + \RaggedRight +}% \tabu@cell@L +\def\tabu@cell@C{% force alignment to center (ragged2e) + \tabu@cell@align + {\tabu@removehfil \Centering \tabu@flush{.5}\tabu@cellleft} + {\tabu@flush{.5}\tabu@ignorehfil} + \Centering +}% \tabu@cell@C +\def\tabu@cell@R{% force alignment to right (ragged2e) + \tabu@cell@align + {\tabu@removehfil \RaggedLeft \tabu@flush 1\tabu@cellleft} + \tabu@ignorehfil + \RaggedLeft +}% \tabu@cell@R +\def\tabu@cell@J{% force justification (ragged2e) + \tabu@cell@align + {\justifying \tabu@cellleft} + {} + \justifying +}% \tabu@cell@J +\def\tabu@flush#1{% + \iftabu@colortbl % colortbl uses \hfill rather than \hfil + \hskip \ifnum13<\currentgrouptype \stretch{#1}% + \else \ifdim#1pt<\p@ \tabu@cellskip + \else \stretch{#1} + \fi\fi \relax + \else % array.sty + \ifnum 13<\currentgrouptype + \hfil \hskip1sp \relax \fi + \fi +}% \tabu@flush +\let\tabu@hfil \hfil +\let\tabu@hfill \hfill +\let\tabu@hskip \hskip +\def\tabu@removehfil{% + \iftabu@colortbl + \unkern \tabu@cellskip =\lastskip + \ifnum\gluestretchorder\tabu@cellskip =\tw@ \hskip-\tabu@cellskip + \else \tabu@cellskip \z@skip + \fi + \else + \ifdim\lastskip=1sp\unskip\fi + \ifnum\gluestretchorder\lastskip =\@ne + \hfilneg % \hfilneg for array.sty but not for colortbl... + \fi + \fi +}% \tabu@removehfil +\def\tabu@ignorehfil{\aftergroup \tabu@nohfil} +\def\tabu@nohfil{% \hfil -> do nothing + restore original \hfil + \def\hfil{\let\hfil \tabu@hfil}% local to (alignment template) group +}% \tabu@nohfil +\def\tabu@colortblalignments {% if colortbl + \def\tabu@nohfil{% + \def\hfil {\let\hfil \tabu@hfil}% local to (alignment template) group + \def\hfill {\let\hfill \tabu@hfill}% (colortbl uses \hfill) pfff... + \def\hskip ####1\relax{\let\hskip \tabu@hskip}}% local +}% \tabu@colortblalignments +%% Taking care of footnotes and hyperfootnotes ---------------------- +\long\def\tabu@footnotetext #1{% + \edef\@tempa{\the\tabu@footnotes + \noexpand\footnotetext [\the\csname c@\@mpfn\endcsname]}% + \global\tabu@footnotes\expandafter{\@tempa {#1}}}% +\long\def\tabu@xfootnotetext [#1]#2{% + \global\tabu@footnotes\expandafter{\the\tabu@footnotes + \footnotetext [{#1}]{#2}}} +\let\tabu@xfootnote \@xfootnote +\long\def\tabu@Hy@ftntext{\tabu@Hy@ftntxt {\the \c@footnote }} +\long\def\tabu@Hy@xfootnote [#1]{% + \begingroup + \value\@mpfn #1\relax + \protected@xdef \@thefnmark {\thempfn}% + \endgroup + \@footnotemark \tabu@Hy@ftntxt {#1}% +}% \tabu@Hy@xfootnote +\long\def\tabu@Hy@ftntxt #1#2{% + \edef\@tempa{% + \the\tabu@footnotes + \begingroup + \value\@mpfn #1\relax + \noexpand\protected@xdef\noexpand\@thefnmark {\noexpand\thempfn}% + \expandafter \noexpand \expandafter + \tabu@Hy@footnotetext \expandafter{\Hy@footnote@currentHref}% + }% + \global\tabu@footnotes\expandafter{\@tempa {#2}% + \endgroup}% +}% \tabu@Hy@ftntxt +\long\def\tabu@Hy@footnotetext #1#2{% + \H@@footnotetext{% + \ifHy@nesting + \hyper@@anchor {#1}{#2}% + \else + \Hy@raisedlink{% + \hyper@@anchor {#1}{\relax}% + }% + \def\@currentHref {#1}% + \let\@currentlabelname \@empty + #2% + \fi + }% +}% \tabu@Hy@footnotetext +%% No need for \arraybackslash ! ------------------------------------ +\def\tabu@latextwoe {% +\def\tabu@temp##1##2##3{{\toks@\expandafter{##2##3}\xdef##1{\the\toks@}}} +\tabu@temp \tabu@centering \centering \arraybackslash +\tabu@temp \tabu@raggedleft \raggedleft \arraybackslash +\tabu@temp \tabu@raggedright \raggedright \arraybackslash +}% \tabu@latextwoe +\def\tabu@raggedtwoe {% +\def\tabu@temp ##1##2##3{{\toks@\expandafter{##2##3}\xdef##1{\the\toks@}}} +\tabu@temp \tabu@Centering \Centering \arraybackslash +\tabu@temp \tabu@RaggedLeft \RaggedLeft \arraybackslash +\tabu@temp \tabu@RaggedRight \RaggedRight \arraybackslash +\tabu@temp \tabu@justifying \justifying \arraybackslash +}% \tabu@raggedtwoe +\def\tabu@normalcrbackslash{\let\\\@normalcr} +\def\tabu@trivlist{\expandafter\def\expandafter\@trivlist\expandafter{% + \expandafter\tabu@normalcrbackslash \@trivlist}} +%% Utilities: \fbox \fcolorbox and \tabudecimal ------------------- +\def\tabu@fbox {\leavevmode\afterassignment\tabu@beginfbox \setbox\@tempboxa\hbox} +\def\tabu@beginfbox {\bgroup \kern\fboxsep + \bgroup\aftergroup\tabu@endfbox} +\def\tabu@endfbox {\kern\fboxsep\egroup\egroup + \@frameb@x\relax} +\def\tabu@color@b@x #1#2{\leavevmode \bgroup + \def\tabu@docolor@b@x{#1{#2\color@block{\wd\z@}{\ht\z@}{\dp\z@}\box\z@}}% + \afterassignment\tabu@begincolor@b@x \setbox\z@ \hbox +}% \tabu@color@b@x +\def\tabu@begincolor@b@x {\kern\fboxsep \bgroup + \aftergroup\tabu@endcolor@b@x \set@color} +\def\tabu@endcolor@b@x {\kern\fboxsep \egroup + \dimen@\ht\z@ \advance\dimen@ \fboxsep \ht\z@ \dimen@ + \dimen@\dp\z@ \advance\dimen@ \fboxsep \dp\z@ \dimen@ + \tabu@docolor@b@x \egroup +}% \tabu@endcolor@b@x +%% Corrections (arydshln, delarray, colortbl) ----------------------- +\def\tabu@fix@arrayright {%% \@arrayright is missing from \endarray + \iftabu@colortbl + \ifdefined\adl@array % + \def\tabu@endarray{% + \adl@endarray \egroup \adl@arrayrestore \CT@end \egroup % + \@arrayright % + \gdef\@preamble{}}% + \else % + \def\tabu@endarray{% + \crcr \egroup \egroup % + \@arrayright % + \gdef\@preamble{}\CT@end}% + \fi + \else + \ifdefined\adl@array % + \def\tabu@endarray{% + \adl@endarray \egroup \adl@arrayrestore \egroup % + \@arrayright % + \gdef\@preamble{}}% + \else % + \PackageWarning{tabu} + {\string\@arrayright\space is missing from the + \MessageBreak definition of \string\endarray. + \MessageBreak Compatibility with delarray.sty is broken.}% + \fi\fi +}% \tabu@fix@arrayright +\def\tabu@adl@xarraydashrule #1#2#3{% + \ifnum\@lastchclass=\adl@class@start\else + \ifnum\@lastchclass=\@ne\else + \ifnum\@lastchclass=5 \else % @-arg (class 5) and !-arg (class 1) + \adl@leftrulefalse \fi\fi % must be treated the same + \fi + \ifadl@zwvrule\else \ifadl@inactive\else + \@addtopreamble{\vrule\@width\arrayrulewidth + \@height\z@ \@depth\z@}\fi \fi + \ifadl@leftrule + \@addtopreamble{\adl@vlineL{\CT@arc@}{\adl@dashgapcolor}% + {\number#1}#3}% + \else \@addtopreamble{\adl@vlineR{\CT@arc@}{\adl@dashgapcolor}% + {\number#2}#3} + \fi +}% \tabu@adl@xarraydashrule +\def\tabu@adl@act@endpbox {% + \unskip \ifhmode \nobreak \fi \@finalstrut \@arstrutbox + \egroup \egroup + \adl@colhtdp \box\adl@box \hfil +}% \tabu@adl@act@endpbox +\def\tabu@adl@fix {% + \let\adl@xarraydashrule \tabu@adl@xarraydashrule % arydshln + \let\adl@act@endpbox \tabu@adl@act@endpbox % arydshln + \let\adl@act@@endpbox \tabu@adl@act@endpbox % arydshln + \let\@preamerror \@preamerr % arydshln +}% \tabu@adl@fix +%% Correction for longtable' \@startbox definition ------------------ +%% => \everypar is ``missing'' : TeX should be in vertical mode +\def\tabu@LT@startpbox #1{% + \bgroup + \let\@footnotetext\LT@p@ftntext + \setlength\hsize{#1}% + \@arrayparboxrestore + \everypar{% + \vrule \@height \ht\@arstrutbox \@width \z@ + \everypar{}}% +}% \tabu@LT@startpbox +%% \tracingtabu and the package options ------------------ +\DeclareOption{delarray}{\AtEndOfPackage{\RequirePackage{delarray}}} +\DeclareOption{linegoal}{% + \AtEndOfPackage{% + \RequirePackage{linegoal}[2010/12/07]% + \let\tabudefaulttarget \linegoal% \linegoal is \linewidth if not pdfTeX +}} +\DeclareOption{scantokens}{\tabuscantokenstrue} +\DeclareOption{debugshow}{\AtEndOfPackage{\tracingtabu=\tw@}} +\def\tracingtabu {\begingroup\@ifnextchar=% + {\afterassignment\tabu@tracing\count@} + {\afterassignment\tabu@tracing\count@1\relax}} +\def\tabu@tracing{\expandafter\endgroup + \expandafter\tabu@tr@cing \the\count@ \relax +}% \tabu@tracing +\def\tabu@tr@cing #1\relax {% + \ifnum#1>\thr@@ \let\tabu@tracinglines\message + \else \let\tabu@tracinglines\@gobble + \fi + \ifnum#1>\tw@ \let\tabu@DBG \tabu@@DBG + \def\tabu@mkarstrut {\tabu@DBG@arstrut}% + \tabustrutrule 1.5\p@ + \else \let\tabu@DBG \@gobble + \def\tabu@mkarstrut {\tabu@arstrut}% + \tabustrutrule \z@ + \fi + \ifnum#1>\@ne \let\tabu@debug \message + \else \let\tabu@debug \@gobble + \fi + \ifnum#1>\z@ + \let\tabu@message \message + \let\tabu@tracing@save \tabu@message@save + \let\tabu@starttimer \tabu@pdftimer + \else + \let\tabu@message \@gobble + \let\tabu@tracing@save \@gobble + \let\tabu@starttimer \relax + \fi +}% \tabu@tr@cing +%% Setup \AtBeginDocument +\AtBeginDocument{\tabu@AtBeginDocument} +\def\tabu@AtBeginDocument{\let\tabu@AtBeginDocument \@undefined + \ifdefined\arrayrulecolor \tabu@colortbltrue % + \tabu@colortblalignments % different glues are used + \else \tabu@colortblfalse \fi + \ifdefined\CT@arc@ \else \let\CT@arc@ \relax \fi + \ifdefined\CT@drsc@\else \let\CT@drsc@ \relax \fi + \let\tabu@arc@L \CT@arc@ \let\tabu@drsc@L \CT@drsc@ + \ifodd 1\ifcsname siunitx_table_collect_begin:Nn\endcsname % + \expandafter\ifx + \csname siunitx_table_collect_begin:Nn\endcsname\relax 0\fi\fi\relax + \tabu@siunitxtrue + \else \let\tabu@maybesiunitx \@firstofone % + \let\tabu@siunitx \tabu@nosiunitx + \tabu@siunitxfalse + \fi + \ifdefined\adl@array % + \else \let\tabu@adl@fix \relax + \let\tabu@adl@endtrial \@empty \fi + \ifdefined\longtable % + \else \let\longtabu \tabu@nolongtabu \fi + \ifdefined\cellspacetoplimit \tabu@warn@cellspace\fi + \csname\ifcsname ifHy@hyperfootnotes\endcsname % + ifHy@hyperfootnotes\else iffalse\fi\endcsname + \let\tabu@footnotetext \tabu@Hy@ftntext + \let\tabu@xfootnote \tabu@Hy@xfootnote \fi + \ifdefined\FV@DefineCheckEnd% + \tabu@fancyvrb \fi + \ifdefined\color % + \let\tabu@color \color + \def\tabu@leavevmodecolor ##1{% + \def\tabu@leavevmodecolor {\leavevmode ##1}% + }\expandafter\tabu@leavevmodecolor\expandafter{\color}% + \else + \let\tabu@color \tabu@nocolor + \let\tabu@leavevmodecolor \@firstofone \fi + \tabu@latextwoe + \ifdefined\@raggedtwoe@everyselectfont % + \tabu@raggedtwoe + \else + \let\tabu@cell@L \tabu@cell@l + \let\tabu@cell@R \tabu@cell@r + \let\tabu@cell@C \tabu@cell@c + \let\tabu@cell@J \tabu@cell@j \fi + \expandafter\in@ \expandafter\@arrayright \expandafter{\endarray}% + \ifin@ \let\tabu@endarray \endarray + \else \tabu@fix@arrayright \fi% + \everyrow{}% +}% \tabu@AtBeginDocument +\def\tabu@warn@cellspace{% + \PackageWarning{tabu}{% + Package cellspace has some limitations + \MessageBreak And redefines some macros of array.sty. + \MessageBreak Please use \string\tabulinesep\space to control + \MessageBreak vertical spacing of lines inside tabu environment}% +}% \tabu@warn@cellspace +%% tabu Package initialisation +\tabuscantokensfalse +\let\tabu@arc@G \relax +\let\tabu@drsc@G \relax +\let\tabu@evr@G \@empty +\let\tabu@rc@G \@empty +\def\tabu@ls@G {\tabu@linestyle@}% +\let\tabu@@rowfontreset \@empty % +\let\tabu@@celllalign \@empty +\let\tabu@@cellralign \@empty +\let\tabu@@cellleft \@empty +\let\tabu@@cellright \@empty +\def\tabu@naturalXmin {\z@} +\def\tabu@naturalXmax {\z@} +\let\tabu@rowfontreset \@empty +\def\tabulineon {4pt}\let\tabulineoff \tabulineon +\tabu@everyrowtrue +\ifdefined\pdfelapsedtime % + \def\tabu@pdftimer {\xdef\tabu@starttime{\the\pdfelapsedtime}}% +\else \let\tabu@pdftimer \relax \let\tabu@message@etime \relax +\fi +\tracingtabu=\z@ +\newtabulinestyle {=\maxdimen}% creates the 'factory' settings \tabu@linestyle@ +\tabulinestyle{} +\taburowcolors{} +\let\tabudefaulttarget \linewidth +\ProcessOptions* % \ProcessOptions* is quicker ! +\endinput +%% +%% End of file `tabu.sty'. diff --git a/docs/docs/doxygen/latex/util_8hpp.tex b/docs/docs/doxygen/latex/util_8hpp.tex new file mode 100644 index 00000000..846d2252 --- /dev/null +++ b/docs/docs/doxygen/latex/util_8hpp.tex @@ -0,0 +1,29 @@ +\hypertarget{util_8hpp}{}\doxysection{/\+Users/fboemer/repos/\+D\+B\+I\+O/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util/util.hpp File Reference} +\label{util_8hpp}\index{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util/util.hpp@{/Users/fboemer/repos/DBIO/intel-\/hexl/intel-\/hexl/include/intel-\/hexl/util/util.hpp}} +\doxysubsection*{Namespaces} +\begin{DoxyCompactItemize} +\item + \mbox{\hyperlink{namespaceintel}{intel}} +\item + \mbox{\hyperlink{namespaceintel_1_1hexl}{intel\+::hexl}} +\end{DoxyCompactItemize} +\doxysubsection*{Enumerations} +\begin{DoxyCompactItemize} +\item +enum \mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006}{intel\+::hexl\+::\+C\+M\+P\+I\+NT}} \{ \newline +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a2dcbad7477fd40561e8b8198f173bd47}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+EQ}} = 0, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac562607189d77eb9dfb707464c1e7b0b}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+LT}} = 1, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006acfe6055d2e0503be378bb63449ec7ba6}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+LE}} = 2, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006a946003f97ccc52d5d3b54ac0ec31bbfc}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+F\+A\+L\+SE}} = 3, +\newline +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006adc33066c3993e0d50896e533fd692ce0}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+NE}} = 4, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ad7d6a13c7b311ec8a3c9fcfb1919a2f8}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+N\+LT}} = 5, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006aacd748f300c5d189c47807e2a9d6ea57}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+N\+LE}} = 6, +\mbox{\hyperlink{namespaceintel_1_1hexl_abdcc9d2d5bb10fa95d5f143874508006ac0d83f0b82a6b30de8811e69e6d95c61}{intel\+::hexl\+::\+C\+M\+P\+I\+N\+T\+::\+T\+R\+UE}} = 7 + \} +\begin{DoxyCompactList}\small\item\em Represents binary operations between two boolean values. \end{DoxyCompactList}\end{DoxyCompactItemize} +\doxysubsection*{Functions} +\begin{DoxyCompactItemize} +\item +C\+M\+P\+I\+NT \mbox{\hyperlink{namespaceintel_1_1hexl_a8c654502a5e7fe2cfdd198f0fd920f2a}{intel\+::hexl\+::\+Not}} (C\+M\+P\+I\+NT cmp) +\begin{DoxyCompactList}\small\item\em Returns the logical negation of a binary operation. \end{DoxyCompactList}\end{DoxyCompactItemize} diff --git a/docs/docs/doxygen/xml/_r_e_a_d_m_e_8md.xml b/docs/docs/doxygen/xml/_r_e_a_d_m_e_8md.xml new file mode 100644 index 00000000..4a3e25a9 --- /dev/null +++ b/docs/docs/doxygen/xml/_r_e_a_d_m_e_8md.xml @@ -0,0 +1,179 @@ + + + + README.md + + + + + +#IntelHomomorphicEncryptionAccelerationLibrary(HEXL) +IntelHEXLisanopen-sourcelibrarywhichprovidesefficientimplementationsofintegerarithmeticonGaloisfields.Sucharithmeticisprevalentincryptography,particularlyinhomomorphicencryption(HE)schemes.IntelHEXLtargetsintegerarithmeticwithword-sizedprimes,typically40-60bits.IntelHEXLprovidesanAPIfor64-bitunsignedintegersandtargetsIntelCPUs. + +##Contents +-[IntelHomomorphicEncryptionAccelerationLibrary(HEXL)](#intel-homomorphic-encryption-acceleration-library-hexl) +-[Contents](#contents) +-[Introduction](#introduction) +-[BuildingIntelHEXL](#building-intel-hexl) +-[Dependencies](#dependencies) +-[Compile-timeoptions](#compile-time-options) +-[CompilingIntelHEXL](#compiling-intel-hexl) +-[TestingIntelHEXL](#testing-intel-hexl) +-[BenchmarkingIntelHEXL](#benchmarking-intel-hexl) +-[UsingIntelHEXL](#using-intel-hexl) +-[Debugging](#debugging) +-[Thread-safety](#thread-safety) +-[Documentation](#documentation) +-[Doxygen](#doxygen) +-[Sphinx](#sphinx) +-[Contributing](#contributing) +-[Repositorylayout](#repository-layout) + +##Introduction +Manycryptographicapplications,particularlyhomomorphicencryption(HE),relyonintegerpolynomialarithmeticinafinitefield.HE,whichenablescomputationonencrypteddata,typicallyusespolynomialswithdegree`N`apoweroftworoughlyintherange`N=[2^{10},2^{17}]`.Thecoefficientsofthesepolynomialsareinafinitefieldwithaword-sizedprimes,`p`,upto`p`~62bits.Moreprecisely,thepolynomialsliveinthering`Z_p[X]/(X^N+1)`.Thatis,whenaddingormultiplyingtwopolynomials,eachcoefficientoftheresultisreducedbytheprimemodulus`p`.Whenmultiplyingtwopolynomials,theresultingpolynomialsofdegree`2N`isadditionallyreducedbytakingtheremainderwhendividingby`X^N+1`. + +TheprimarybottleneckinmanyHEapplicationsispolynomial-polynomialmultiplicationin`Z_p[X]/(X^N+1)`.Forefficientimplementation,IntelHEXLimplementsthenegacyclicnumber-theoretictransform(NTT).Tomultiplytwopolynomials,`p_1(x),p_2(x)`usingtheNTT,weperformtheFwdNTTonthetwoinputpolynomials,thenperformanelement-wisemodularmultiplication,andperformtheInvNTTontheresult. + +IntelHEXLimplementsthefollowingfunctions: +-Theforwardandinversenegacyclicnumber-theoretictransform(NTT) +-Element-wisevector-vectormodularmultiplication +-Element-wisevector-scalarmodularmultiplicationwithoptionaladdition +-Element-wisemodularmultiplication + +Foreachfunction,thelibraryimplementsoneorseveralIntel(R)AVX-512implementations,aswellasalessperformant,morereadablenativeC++implementation.IntelHEXLwillautomaticallychoosethebestimplementationforthegivenCPUIntel(R)AVX-512featureset.Inparticular,whenthemodulus`p`islessthan`2^{50}`,theAVX512IFMAinstructionsetavailableonIntelIceLakeserverandIceLakeclientwillprovideamoreefficientimplementation. + +Foradditionalfunctionality,seethepublicheaders,locatedin`include/intel-hexl` +##BuildingIntelHEXL + +###Dependencies +WehavetestedIntelHEXLonthefollowingoperatingsystems: +-Ubuntu18.04 +-macOS10.15 +-MicrosoftWindows10 + +IntelHEXLrequiresthefollowingdependencies: + +|Dependency|Version| +|-------------|----------------------------------------------| +|CMake|>=3.5.1| +|Compiler|gcc>=7.0,clang++>=5.0,MSVC>=2019| + +Forbestperformance,werecommendusingaprocessorwithAVX512-IFMA52support,andarecentcompiler(gcc>=8.0,clang++>=6.0).TodetermineifyourprocesssupportsAVX512-IFMA52,simplylookfor`HEXL_HAS_AVX512IFMA`duringtheconfigurestep(see[CompilingIntelHEXL](#compiling-intel-hexl)). + + +###Compile-timeoptions +InadditiontothestandardCMakebuildoptions,IntelHEXLsupportsseveralcompile-timeflagstoconfigurethebuild. +Forconvenience,theyarelistedbelow: + +|CMakeoption|Values|| +|---------------------------------|----------------------|------------------------------------------------------------------------| +|HEXL_BENCHMARK|ON/OFF(defaultON)|SettoONtoenablebenchmarksuiteviaGooglebenchmark| +|HEXL_COVERAGE|ON/OFF(defaultOFF)|SettoONtoenablecoveragereportofunit-tests| +|HEXL_DEBUG|ON/OFF(defaultOFF)|SettoONtoenabledebuggingatlargeruntimepenalty| +|HEXL_DOCS|ON/OFF(defaultOFF)|SettoONtoenablebuildingofdocumentation| +|HEXL_ENABLE_ADDRESS_SANITIZER|ON/OFF(defaultOFF)|SettoONtoenablebuildingwithaddresssanitizer(ASan)| +|HEXL_ENABLE_THREAD_SANITIZER|ON/OFF(defaultOFF)|SettoONtoenablebuildingwiththreadsanitizer(TSan)| +|HEXL_ENABLE_UB_SANITIZER|ON/OFF(defaultOFF)|SettoONtoenablebuildingwithundefinedbehaviorsanitizer(UBSan)| +|HEXL_EXPORT|ON/OFF(defaultOFF)|SettoONtoenableexportofIntelHEXLforusein3rd-partyproject| +|HEXL_SHARED_LIB|ON/OFF(defaultOFF)|SettoONtoenablebuildingsharedlibrary| +|HEXL_TESTING|ON/OFF(defaultON)|SettoONtoenablebuildingofunit-tests| + +###CompilingIntelHEXL +TheinstructionstobuildIntelHEXLarecommonbetweenLinux,MacOS,andWindows. + +TocompileIntelHEXLfromsourcecode,firstclonetherepositoryintoyourcurrentdirectory.Then,toconfigurethebuild,call +```bash +cmake-S.-Bbuild +``` +addingthedesiredcompile-timeoptionswitha`-D`flag.Forinstance,tobuildIntelHEXLwithdebuggingcapabilities,call +```bash +cmake-S.-Bbuild-DHEXL_DEBUG=ON +``` + +Then,tobuildIntelHEXL,call +```bash +cmake--buildbuild +``` +ThiswillbuildtheIntelHEXLlibraryinthe`build/intel-hexl/lib/`directory. + +ToinstallIntelHEXLtotheinstallationdirectory,run +```bash +cmake--installbuild +``` +Touseanon-standardinstallationdirectory,configurethebuildwith +```bash +cmake-S.-Bbuild-DCMAKE_INSTALL_PREFIX=/path/to/install +``` + +##TestingIntelHEXL +TorunasetofunittestsviaGoogletest,configureandbuildIntelHEXLwith`-DHEXL_TESTING=ON`(see[Compile-timeoptions](#compile-time-options)). +Then,run +```bash +cmake--buildbuild--targetunittest +``` +Theunit-testexecutableitselfislocatedat`build/test/unit-test` +##BenchmarkingIntelHEXL +TorunasetofbenchmarksviaGooglebenchmark,configureandbuildIntelHEXLwith`-DHEXL_BENCHMARK=ON`(see[Compile-timeoptions](#compile-time-options)). +Then,run +```bash +cmake--buildbuild--targetbench +``` +Thebenchmarkexecutableitselfislocatedat`build/benchmark/bench_hexl` + +##UsingIntelHEXL +The`example`folderhasanexampleofusingIntelHEXLinathird-partyproject. + +##Debugging +Foroptimalperformance,IntelHEXLdoesnotperforminputvalidation.Inmanycasesthetimerequiredforthevalidationwouldbelongerthantheexecutionofthefunctionitself.TodebugIntelHEXL,configureandbuildIntelHEXLwith`-DHEXL_DEBUG=ON`(see[Compile-timeoptions](#compile-time-options)).Thiswillgenerateadebugversionofthelibrary,e.g.`libintel_hexl_debug.a`,thatcanbeusedtodebugtheexecution. + +**Note**,enabling`HEXL_DEBUG=ON`willresultinasignificantruntimeoverhead. +##Thread-safety +IntelHEXLissingle-threadedandthread-safe. + +#Documentation + +IntelHEXLsupportsdocumentationviaDoxygenandsphinx. +Tobuilddocumentation,firstinstall`doxygen`and`graphviz`,e.g. +```bash +sudoapt-getinstalldoxygengraphviz +``` +Then,configureIntelHEXLwith`-DHEXL_DOCS=ON`(see[Compile-timeoptions](#compile-time-options)). +###Doxygen +TobuildDoxygendocumentation,afterconfiguringIntelHEXLwith`-DHEXL_DOCS=ON`,run +``` +cmake--buildbuild--targetdoxygen +``` +ToviewthegeneratedDoxygendocumentation,openthegenerated`build/docs/doxygen/html/index.html`fileinawebbrowser. + +###Sphinx +Tobuildthesphinxdocumentation,install`sphinx`andrequireddependencies`breathe,m2r2`,e.g. +```bash +sudoapt-getinstallpython3-sphinx +pip3installbreathem2r2 +``` + +Then,afterconfiguringIntelHEXLwith`-DHEXL_DOCS=ON`,run +```bash +cmake--buildbuild--targetdocs +``` +ToviewthegeneratedSphinxdocumentation,openthegenerated`build/docs/sphinx/html/index.html`fileinawebbrowser. + +#Contributing + +Atthistime,IntelHEXLdoesnotacceptexternalcontributions.Feelfreetodiscussviaissues. + +ForInteldevelopers,use[pre-commit](https://pre-commit.com/)tovalidatetheformattingofthecode. + +Beforecontributing,pleaserun +```bash +makecheck +``` +andmakesureallunittestsandpre-commitcheckspass. + +##Repositorylayout +Publicheadersresideinthe`intel-hexl/include`folder. +Privateheaders,e.g.thosecontainingIntel(R)AVX-512codeshouldnotbeputinthisfolder. + + + + diff --git a/docs/docs/doxygen/xml/classintel_1_1hexl_1_1_n_t_t.xml b/docs/docs/doxygen/xml/classintel_1_1hexl_1_1_n_t_t.xml new file mode 100644 index 00000000..a812cd80 --- /dev/null +++ b/docs/docs/doxygen/xml/classintel_1_1hexl_1_1_n_t_t.xml @@ -0,0 +1,288 @@ + + + + intel::hexl::NTT + ntt.hpp + + + std::shared_ptr< NTTImpl > + std::shared_ptr<NTTImpl> intel::hexl::NTT::m_impl + + m_impl + +Class implementing the NTT. + + + + + + + + + + + + intel::hexl::NTT::NTT + () + NTT + +Initializes an empty NTT object. + + + + + + + + + + intel::hexl::NTT::~NTT + () + ~NTT + +Destructs the NTT object. + + + + + + + + + + intel::hexl::NTT::NTT + (uint64_t degree, uint64_t p) + NTT + + uint64_t + degree + + + uint64_t + p + + +Performs pre-computation necessary for forward and inverse transforms. + + +Initializes an NTT object with degree degree and modulus p. + +degree + + +a.k.a. N. Size of the NTT transform. Must be a power of 2 + + + + +p + + +Prime modulus. Must satisfy $ p == 1 \mod 2N $ + + + + + + + + + + + + intel::hexl::NTT::NTT + (uint64_t degree, uint64_t p, uint64_t root_of_unity) + NTT + + uint64_t + degree + + + uint64_t + p + + + uint64_t + root_of_unity + + +Initializes an NTT object with degree degree and modulus p. + + + + +degree + + +a.k.a. N. Size of the NTT transform. Must be a power of 2 + + + + +p + + +Prime modulus. Must satisfy $ p == 1 \mod 2N $ + + + + +root_of_unity + + +2N'th root of unity in $ \mathbb{Z_p} $. + + + +Performs pre-computation necessary for forward and inverse transforms + + + + + + + void + void intel::hexl::NTT::ComputeForward + (uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor) + ComputeForward + + uint64_t * + result + + + const uint64_t * + operand + + + uint64_t + input_mod_factor + + + uint64_t + output_mod_factor + + +Compute forward NTT. Results are bit-reversed. + + + + +result + + +Stores the result + + + + +operand + + +Data on which to compute the NTT + + + + +input_mod_factor + + +Assume input operand are in [0, input_mod_factor * p). Must be 1, 2 or 4. + + + + +output_mod_factor + + +Returns output operand in [0, output_mod_factor * p). Must be 1 or 4. + + + + + + + + + + + void + void intel::hexl::NTT::ComputeInverse + (uint64_t *result, const uint64_t *operand, uint64_t input_mod_factor, uint64_t output_mod_factor) + ComputeInverse + + uint64_t * + result + + + const uint64_t * + operand + + + uint64_t + input_mod_factor + + + uint64_t + output_mod_factor + + + + +Compute inverse NTT. Results are bit-reversed. + +result + + +Stores the result + + + + +operand + + +Data on which to compute the NTT + + + + +input_mod_factor + + +Assume input operand are in [0, input_mod_factor * p). Must be 1 or 2. + + + + +output_mod_factor + + +Returns output operand in [0, output_mod_factor * p). Must be 1 or 2. + + + + + + + + + + + +Performs negacyclic forward and inverse number-theoretic transform (NTT), commonly used in RLWE cryptography. + + +The number-theoretic transform (NTT) specializes the discrete Fourier transform (DFT) to the finite field $ \mathbb{Z}_p / (X^N + 1) $. + + + + intel::hexl::NTTComputeForward + intel::hexl::NTTComputeInverse + intel::hexl::NTTm_impl + intel::hexl::NTTNTT + intel::hexl::NTTNTT + intel::hexl::NTTNTT + intel::hexl::NTT~NTT + + + diff --git a/docs/docs/doxygen/xml/combine.xslt b/docs/docs/doxygen/xml/combine.xslt new file mode 100644 index 00000000..f0ee1fd9 --- /dev/null +++ b/docs/docs/doxygen/xml/combine.xslt @@ -0,0 +1,15 @@ + + + + + + + + + + + + diff --git a/docs/docs/doxygen/xml/compound.xsd b/docs/docs/doxygen/xml/compound.xsd new file mode 100644 index 00000000..083df158 --- /dev/null +++ b/docs/docs/doxygen/xml/compound.xsd @@ -0,0 +1,1187 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/docs/doxygen/xml/dir_60e7388d20bffeeed71217422ae2faa2.xml b/docs/docs/doxygen/xml/dir_60e7388d20bffeeed71217422ae2faa2.xml new file mode 100644 index 00000000..4d9af955 --- /dev/null +++ b/docs/docs/doxygen/xml/dir_60e7388d20bffeeed71217422ae2faa2.xml @@ -0,0 +1,12 @@ + + + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/util + util.hpp + + + + + + + diff --git a/docs/docs/doxygen/xml/dir_7759c3a881395f02ed4947f5a1aa5b9c.xml b/docs/docs/doxygen/xml/dir_7759c3a881395f02ed4947f5a1aa5b9c.xml new file mode 100644 index 00000000..fb52c4fa --- /dev/null +++ b/docs/docs/doxygen/xml/dir_7759c3a881395f02ed4947f5a1aa5b9c.xml @@ -0,0 +1,15 @@ + + + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/util + intel-hexl.hpp + + + + + + + diff --git a/docs/docs/doxygen/xml/dir_8420cd92772e9af80a76f3e30148eb70.xml b/docs/docs/doxygen/xml/dir_8420cd92772e9af80a76f3e30148eb70.xml new file mode 100644 index 00000000..965f8a7f --- /dev/null +++ b/docs/docs/doxygen/xml/dir_8420cd92772e9af80a76f3e30148eb70.xml @@ -0,0 +1,12 @@ + + + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include + + + + + + + diff --git a/docs/docs/doxygen/xml/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.xml b/docs/docs/doxygen/xml/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.xml new file mode 100644 index 00000000..b2cdfe6d --- /dev/null +++ b/docs/docs/doxygen/xml/dir_8cfc4ed6d3ecc3c5762eb1b8347d82fa.xml @@ -0,0 +1,12 @@ + + + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl + + + + + + + diff --git a/docs/docs/doxygen/xml/dir_b272e9f08317806cfbaee27c029c625d.xml b/docs/docs/doxygen/xml/dir_b272e9f08317806cfbaee27c029c625d.xml new file mode 100644 index 00000000..301d9792 --- /dev/null +++ b/docs/docs/doxygen/xml/dir_b272e9f08317806cfbaee27c029c625d.xml @@ -0,0 +1,12 @@ + + + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt + ntt.hpp + + + + + + + diff --git a/docs/docs/doxygen/xml/dir_b327ef8739a3d23275834e47dda5cef1.xml b/docs/docs/doxygen/xml/dir_b327ef8739a3d23275834e47dda5cef1.xml new file mode 100644 index 00000000..645e2936 --- /dev/null +++ b/docs/docs/doxygen/xml/dir_b327ef8739a3d23275834e47dda5cef1.xml @@ -0,0 +1,17 @@ + + + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise + eltwise-add-mod.hpp + eltwise-cmp-add.hpp + eltwise-cmp-sub-mod.hpp + eltwise-fma-mod.hpp + eltwise-mult-mod.hpp + eltwise-reduce-mod.hpp + + + + + + + diff --git a/docs/docs/doxygen/xml/eltwise-add-mod_8hpp.xml b/docs/docs/doxygen/xml/eltwise-add-mod_8hpp.xml new file mode 100644 index 00000000..308736f5 --- /dev/null +++ b/docs/docs/doxygen/xml/eltwise-add-mod_8hpp.xml @@ -0,0 +1,55 @@ + + + + eltwise-add-mod.hpp + stdint.h + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +namespaceintel{ +namespacehexl{ + +voidEltwiseAddMod(uint64_t*result,constuint64_t*operand1, +constuint64_t*operand2,uint64_tn,uint64_tmodulus); + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/eltwise-cmp-add_8hpp.xml b/docs/docs/doxygen/xml/eltwise-cmp-add_8hpp.xml new file mode 100644 index 00000000..131e4ab9 --- /dev/null +++ b/docs/docs/doxygen/xml/eltwise-cmp-add_8hpp.xml @@ -0,0 +1,64 @@ + + + + eltwise-cmp-add.hpp + stdint.h + intel-hexl/util/util.hpp + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +#include"intel-hexl/util/util.hpp" + +namespaceintel{ +namespacehexl{ + +voidEltwiseCmpAdd(uint64_t*result,constuint64_t*operand1,CMPINTcmp, +uint64_tbound,uint64_tdiff,uint64_tn); + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/eltwise-cmp-sub-mod_8hpp.xml b/docs/docs/doxygen/xml/eltwise-cmp-sub-mod_8hpp.xml new file mode 100644 index 00000000..dbe0213a --- /dev/null +++ b/docs/docs/doxygen/xml/eltwise-cmp-sub-mod_8hpp.xml @@ -0,0 +1,65 @@ + + + + eltwise-cmp-sub-mod.hpp + stdint.h + intel-hexl/util/util.hpp + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +#include"intel-hexl/util/util.hpp" + +namespaceintel{ +namespacehexl{ + +voidEltwiseCmpSubMod(uint64_t*result,constuint64_t*operand1,CMPINTcmp, +uint64_tbound,uint64_tdiff,uint64_tmodulus, +uint64_tn); + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/eltwise-fma-mod_8hpp.xml b/docs/docs/doxygen/xml/eltwise-fma-mod_8hpp.xml new file mode 100644 index 00000000..3cad88ed --- /dev/null +++ b/docs/docs/doxygen/xml/eltwise-fma-mod_8hpp.xml @@ -0,0 +1,56 @@ + + + + eltwise-fma-mod.hpp + stdint.h + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +namespaceintel{ +namespacehexl{ + +voidEltwiseFMAMod(uint64_t*result,constuint64_t*arg1,uint64_targ2, +constuint64_t*arg3,uint64_tn,uint64_tmodulus, +uint64_tinput_mod_factor); + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/eltwise-mult-mod_8hpp.xml b/docs/docs/doxygen/xml/eltwise-mult-mod_8hpp.xml new file mode 100644 index 00000000..cb77bc76 --- /dev/null +++ b/docs/docs/doxygen/xml/eltwise-mult-mod_8hpp.xml @@ -0,0 +1,56 @@ + + + + eltwise-mult-mod.hpp + stdint.h + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +namespaceintel{ +namespacehexl{ + +voidEltwiseMultMod(uint64_t*result,constuint64_t*operand1, +constuint64_t*operand2,uint64_tn,uint64_tmodulus, +uint64_tinput_mod_factor); + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/eltwise-reduce-mod_8hpp.xml b/docs/docs/doxygen/xml/eltwise-reduce-mod_8hpp.xml new file mode 100644 index 00000000..b4e6b493 --- /dev/null +++ b/docs/docs/doxygen/xml/eltwise-reduce-mod_8hpp.xml @@ -0,0 +1,56 @@ + + + + eltwise-reduce-mod.hpp + stdint.h + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +namespaceintel{ +namespacehexl{ + +voidEltwiseReduceMod(uint64_t*result,constuint64_t*operand, +uint64_tmodulus,uint64_tn,uint64_tinput_mod_factor, +uint64_toutput_mod_factor); + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/index.xml b/docs/docs/doxygen/xml/index.xml new file mode 100644 index 00000000..5b987eaf --- /dev/null +++ b/docs/docs/doxygen/xml/index.xml @@ -0,0 +1,66 @@ + + + intel::hexl::NTT + m_impl + NTT + ~NTT + NTT + NTT + ComputeForward + ComputeInverse + + intel + + intel::hexl + CMPINT + EQ + LT + LE + FALSE + NE + NLT + NLE + TRUE + EltwiseAddMod + EltwiseCmpAdd + EltwiseCmpSubMod + EltwiseFMAMod + EltwiseMultMod + EltwiseReduceMod + Not + + eltwise-add-mod.hpp + + eltwise-cmp-add.hpp + + eltwise-cmp-sub-mod.hpp + + eltwise-fma-mod.hpp + + eltwise-mult-mod.hpp + + eltwise-reduce-mod.hpp + + intel-hexl.hpp + + ntt.hpp + + util.hpp + + README.md + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/ntt + + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/util + + index + + diff --git a/docs/docs/doxygen/xml/index.xsd b/docs/docs/doxygen/xml/index.xsd new file mode 100644 index 00000000..04cb2f13 --- /dev/null +++ b/docs/docs/doxygen/xml/index.xsd @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/docs/doxygen/xml/indexpage.xml b/docs/docs/doxygen/xml/indexpage.xml new file mode 100644 index 00000000..fee1fc7e --- /dev/null +++ b/docs/docs/doxygen/xml/indexpage.xml @@ -0,0 +1,219 @@ + + + + index + Intel Homomorphic Encryption Acceleration Library (HEXL) + + + +Intel HEXL is an open-source library which provides efficient implementations of integer arithmetic on Galois fields. Such arithmetic is prevalent in cryptography, particularly in homomorphic encryption (HE) schemes. Intel HEXL targets integer arithmetic with word-sized primes, typically 40-60 bits. Intel HEXL provides an API for 64-bit unsigned integers and targets Intel CPUs. + +Contents + +Intel Homomorphic Encryption Acceleration Library (HEXL) +Contents +Introduction +Building Intel HEXL +Dependencies +Compile-time options +Compiling Intel HEXL + + +Testing Intel HEXL +Benchmarking Intel HEXL +Using Intel HEXL +Debugging +Thread-safety + + +Documentation +Doxygen +Sphinx + + +Contributing +Repository layout + + + + + + +Introduction +Many cryptographic applications, particularly homomorphic encryption (HE), rely on integer polynomial arithmetic in a finite field. HE, which enables computation on encrypted data, typically uses polynomials with degree N a power of two roughly in the range N=[2^{10}, 2^{17}]. The coefficients of these polynomials are in a finite field with a word-sized primes, p, up to p~62 bits. More precisely, the polynomials live in the ring Z_p[X]/(X^N + 1). That is, when adding or multiplying two polynomials, each coefficient of the result is reduced by the prime modulus p. When multiplying two polynomials, the resulting polynomials of degree 2N is additionally reduced by taking the remainder when dividing by X^N+1. +The primary bottleneck in many HE applications is polynomial-polynomial multiplication in Z_p[X]/(X^N + 1). For efficient implementation, Intel HEXL implements the negacyclic number-theoretic transform (NTT). To multiply two polynomials, p_1(x), p_2(x) using the NTT, we perform the FwdNTT on the two input polynomials, then perform an element-wise modular multiplication, and perform the InvNTT on the result. +Intel HEXL implements the following functions: +The forward and inverse negacyclic number-theoretic transform (NTT) +Element-wise vector-vector modular multiplication +Element-wise vector-scalar modular multiplication with optional addition +Element-wise modular multiplication + + +For each function, the library implements one or several Intel(R) AVX-512 implementations, as well as a less performant, more readable native C++ implementation. Intel HEXL will automatically choose the best implementation for the given CPU Intel(R) AVX-512 feature set. In particular, when the modulus p is less than 2^{50}, the AVX512IFMA instruction set available on Intel IceLake server and IceLake client will provide a more efficient implementation. +For additional functionality, see the public headers, located in include/intel-hexl + + +Building Intel HEXL + +Dependencies +We have tested Intel HEXL on the following operating systems: +Ubuntu 18.04 +macOS 10.15 +Microsoft Windows 10 + + +Intel HEXL requires the following dependencies: + +Dependency +Version + + +CMake +>= 3.5.1 + + +Compiler +gcc >= 7.0, clang++ >= 5.0, MSVC >= 2019 + +
    +
    +For best performance, we recommend using a processor with AVX512-IFMA52 support, and a recent compiler (gcc >= 8.0, clang++ >= 6.0). To determine if your process supports AVX512-IFMA52, simply look for HEXL_HAS_AVX512IFMA during the configure step (see Compiling Intel HEXL). +
    + +Compile-time options +In addition to the standard CMake build options, Intel HEXL supports several compile-time flags to configure the build. For convenience, they are listed below: + +CMake option +Values + + + +HEXL_BENCHMARK +ON / OFF (default ON) +Set to ON to enable benchmark suite via Google benchmark + + +HEXL_COVERAGE +ON / OFF (default OFF) +Set to ON to enable coverage report of unit-tests + + +HEXL_DEBUG +ON / OFF (default OFF) +Set to ON to enable debugging at large runtime penalty + + +HEXL_DOCS +ON / OFF (default OFF) +Set to ON to enable building of documentation + + +HEXL_ENABLE_ADDRESS_SANITIZER +ON / OFF (default OFF) +Set to ON to enable building with address sanitizer (ASan) + + +HEXL_ENABLE_THREAD_SANITIZER +ON / OFF (default OFF) +Set to ON to enable building with thread sanitizer (TSan) + + +HEXL_ENABLE_UB_SANITIZER +ON / OFF (default OFF) +Set to ON to enable building with undefined behavior sanitizer (UBSan) + + +HEXL_EXPORT +ON / OFF (default OFF) +Set to ON to enable export of Intel HEXL for use in 3rd-party project + + +HEXL_SHARED_LIB +ON / OFF (default OFF) +Set to ON to enable building shared library + + +HEXL_TESTING +ON / OFF (default ON) +Set to ON to enable building of unit-tests + +
    +
    +
    + +Compiling Intel HEXL +The instructions to build Intel HEXL are common between Linux, MacOS, and Windows. +To compile Intel HEXL from source code, first clone the repository into your current directory. Then, to configure the build, call cmake-S.-Bbuild + +adding the desired compile-time options with a -D flag. For instance, to build Intel HEXL with debugging capabilities, call cmake-S.-Bbuild-DHEXL_DEBUG=ON + +Then, to build Intel HEXL, call cmake--buildbuild + +This will build the Intel HEXL library in the build/intel-hexl/lib/ directory. +To install Intel HEXL to the installation directory, run cmake--installbuild + +To use a non-standard installation directory, configure the build with cmake-S.-Bbuild-DCMAKE_INSTALL_PREFIX=/path/to/install + + +
    + +Testing Intel HEXL +To run a set of unit tests via Googletest, configure and build Intel HEXL with -DHEXL_TESTING=ON (see Compile-time options). Then, run cmake--buildbuild--targetunittest + +The unit-test executable itself is located at build/test/unit-test + + +Benchmarking Intel HEXL +To run a set of benchmarks via Google benchmark, configure and build Intel HEXL with -DHEXL_BENCHMARK=ON (see Compile-time options). Then, run cmake--buildbuild--targetbench + +The benchmark executable itself is located at build/benchmark/bench_hexl + + +Using Intel HEXL +The example folder has an example of using Intel HEXL in a third-party project. + + +Debugging +For optimal performance, Intel HEXL does not perform input validation. In many cases the time required for the validation would be longer than the execution of the function itself. To debug Intel HEXL, configure and build Intel HEXL with -DHEXL_DEBUG=ON (see Compile-time options). This will generate a debug version of the library, e.g. libintel_hexl_debug.a, that can be used to debug the execution. +Note, enabling HEXL_DEBUG=ON will result in a significant runtime overhead. + + +Thread-safety +Intel HEXL is single-threaded and thread-safe. + + +Documentation +Intel HEXL supports documentation via Doxygen and sphinx. To build documentation, first install doxygen and graphviz, e.g. sudoapt-getinstalldoxygengraphviz + +Then, configure Intel HEXL with -DHEXL_DOCS=ON (see Compile-time options). + +Doxygen +To build Doxygen documentation, after configuring Intel HEXL with -DHEXL_DOCS=ON, run cmake--buildbuild--targetdoxygen + +To view the generated Doxygen documentation, open the generated build/docs/doxygen/html/index.html file in a web browser. + + +Sphinx +To build the sphinx documentation, install sphinx and required dependencies breathe, m2r2, e.g. sudoapt-getinstallpython3-sphinx +pip3installbreathem2r2 + +Then, after configuring Intel HEXL with -DHEXL_DOCS=ON, run cmake--buildbuild--targetdocs + +To view the generated Sphinx documentation, open the generated build/docs/sphinx/html/index.html file in a web browser. + + + +Contributing +At this time, Intel HEXL does not accept external contributions. Feel free to discuss via issues. +For Intel developers, use pre-commit to validate the formatting of the code. +Before contributing, please run makecheck + +and make sure all unit tests and pre-commit checks pass. + +Repository layout +Public headers reside in the intel-hexl/include folder. Private headers, e.g. those containing Intel(R) AVX-512 code should not be put in this folder. + + +
    +
    +
    diff --git a/docs/docs/doxygen/xml/intel-hexl_8hpp.xml b/docs/docs/doxygen/xml/intel-hexl_8hpp.xml new file mode 100644 index 00000000..07f85cf2 --- /dev/null +++ b/docs/docs/doxygen/xml/intel-hexl_8hpp.xml @@ -0,0 +1,119 @@ + + + + intel-hexl.hpp + intel-hexl/eltwise/eltwise-add-mod.hpp + intel-hexl/eltwise/eltwise-cmp-add.hpp + intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp + intel-hexl/eltwise/eltwise-fma-mod.hpp + intel-hexl/eltwise/eltwise-mult-mod.hpp + intel-hexl/eltwise/eltwise-reduce-mod.hpp + intel-hexl/ntt/ntt.hpp + intel-hexl/util/util.hpp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include"intel-hexl/eltwise/eltwise-add-mod.hpp" +#include"intel-hexl/eltwise/eltwise-cmp-add.hpp" +#include"intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp" +#include"intel-hexl/eltwise/eltwise-fma-mod.hpp" +#include"intel-hexl/eltwise/eltwise-mult-mod.hpp" +#include"intel-hexl/eltwise/eltwise-reduce-mod.hpp" +#include"intel-hexl/ntt/ntt.hpp" +#include"intel-hexl/util/util.hpp" + + + + diff --git a/docs/docs/doxygen/xml/namespaceintel.xml b/docs/docs/doxygen/xml/namespaceintel.xml new file mode 100644 index 00000000..8470ad06 --- /dev/null +++ b/docs/docs/doxygen/xml/namespaceintel.xml @@ -0,0 +1,12 @@ + + + + intel + intel::hexl + + + + + + + diff --git a/docs/docs/doxygen/xml/namespaceintel_1_1hexl.xml b/docs/docs/doxygen/xml/namespaceintel_1_1hexl.xml new file mode 100644 index 00000000..403fbb2e --- /dev/null +++ b/docs/docs/doxygen/xml/namespaceintel_1_1hexl.xml @@ -0,0 +1,671 @@ + + + + intel::hexl + intel::hexl::NTT + + + + CMPINT + + EQ + = 0 + +Equal. + + + + + + LT + = 1 + +Less than. + + + + + + LE + = 2 + +Less than or equal. + + + + + + FALSE + = 3 + +False. + + + + + + NE + = 4 + +Not equal. + + + + + + NLT + = 5 + +Not less than. + + + + + + NLE + = 6 + +Not less than or equal. + + + + + + TRUE + = 7 + +True. + + + + + +Represents binary operations between two boolean values. + + + + + + + + + + + void + void intel::hexl::EltwiseAddMod + (uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus) + EltwiseAddMod + + uint64_t * + result + + + const uint64_t * + operand1 + + + const uint64_t * + operand2 + + + uint64_t + n + + + uint64_t + modulus + + +Adds two vectors elementwise with modular reduction. + + + + +result + + +Stores result + + + + +operand1 + + +Vector of elements to add. Each element must be less than the modulus + + + + +operand2 + + +Vector of elements to add. Each element must be less than the modulus + + + + +n + + +Number of elements in each vector + + + + +modulus + + +Modulus with which to perform modular reduction. Must be in the range $[2, 2^{63} - 1]$ + + + +Computes $ operand1[i] = (operand1[i] + operand2[i]) \mod modulus $ for $ i=0, ..., n-1$. + + + + + + + void + void intel::hexl::EltwiseCmpAdd + (uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t n) + EltwiseCmpAdd + + uint64_t * + result + + + const uint64_t * + operand1 + + + CMPINT + cmp + + + uint64_t + bound + + + uint64_t + diff + + + uint64_t + n + + +Computes element-wise conditional addition. + + + + +result + + +Stores the result + + + + +operand1 + + +Vector of elements to compare; stores result + + + + +cmp + + +Comparison operation + + + + +bound + + +Scalar to compare against + + + + +diff + + +Scalar to conditionally add + + + + +n + + +Number of elements in operand1 + + + +Computes result[i] = cmp(operand1[i], bound) ? operand1[i] + diff : operand1[i] for all $i=0, ..., n-1$. + + + + + + + void + void intel::hexl::EltwiseCmpSubMod + (uint64_t *result, const uint64_t *operand1, CMPINT cmp, uint64_t bound, uint64_t diff, uint64_t modulus, uint64_t n) + EltwiseCmpSubMod + + uint64_t * + result + + + const uint64_t * + operand1 + + + CMPINT + cmp + + + uint64_t + bound + + + uint64_t + diff + + + uint64_t + modulus + + + uint64_t + n + + +Computes element-wise conditional modular subtraction. + + + + +result + + +Stores the result + + + + +operand1 + + +Vector of elements to compare + + + + +cmp + + +Comparison function + + + + +bound + + +Scalar to compare against + + + + +diff + + +Scalar to subtract by + + + + +modulus + + +Modulus to reduce by + + + + +n + + +Number of elements in operand1 + + + +Computes operand1[i] = (cmp(operand1, bound)) ? (operand1 - diff) mod modulus : operand1 for all i=0, ..., n-1 + + + + + + + void + void intel::hexl::EltwiseFMAMod + (uint64_t *result, const uint64_t *arg1, uint64_t arg2, const uint64_t *arg3, uint64_t n, uint64_t modulus, uint64_t input_mod_factor) + EltwiseFMAMod + + uint64_t * + result + + + const uint64_t * + arg1 + + + uint64_t + arg2 + + + const uint64_t * + arg3 + + + uint64_t + n + + + uint64_t + modulus + + + uint64_t + input_mod_factor + + +Computes fused multiply-add (arg1 * arg2 + arg3) mod modulus element-wise, broadcasting scalars to vectors. + + + + +result + + +Stores the result + + + + +arg1 + + +Vector to multiply + + + + +arg2 + + +Scalar to multiply + + + + +arg3 + + +Vector to add. Will not add if arg3 == nullptr + + + + +n + + +Number of elements in each vector + + + + +modulus + + +Modulus with which to perform modular reduction. Must be in the range $ [2, 2^{61} - 1]$ + + + + +input_mod_factor + + +Assumes input elements are in [0, input_mod_factor * p). Must be 1, 2, 4, or 8. + + + + + + + + + + + void + void intel::hexl::EltwiseMultMod + (uint64_t *result, const uint64_t *operand1, const uint64_t *operand2, uint64_t n, uint64_t modulus, uint64_t input_mod_factor) + EltwiseMultMod + + uint64_t * + result + + + const uint64_t * + operand1 + + + const uint64_t * + operand2 + + + uint64_t + n + + + uint64_t + modulus + + + uint64_t + input_mod_factor + + +Multiplies two vectors elementwise with modular reduction. + + + + +result + + +Result of element-wise multiplication + + + + +operand1 + + +Vector of elements to multiply. Each element must be less than the modulus. + + + + +operand2 + + +Vector of elements to multiply. Each element must be less than the modulus. + + + + +n + + +Number of elements in each vector + + + + +modulus + + +Modulus with which to perform modular reduction + + + + +input_mod_factor + + +Assumes input elements are in [0, input_mod_factor * p) Must be 1, 2 or 4. + + + +Computes result[i] = (operand1[i] * operand2[i]) mod modulus for i=0, ..., n - 1 + + + + + + + void + void intel::hexl::EltwiseReduceMod + (uint64_t *result, const uint64_t *operand, uint64_t modulus, uint64_t n, uint64_t input_mod_factor, uint64_t output_mod_factor) + EltwiseReduceMod + + uint64_t * + result + + + const uint64_t * + operand + + + uint64_t + modulus + + + uint64_t + n + + + uint64_t + input_mod_factor + + + uint64_t + output_mod_factor + + +Performs elementwise modular reduction. + + + + +result + + +Stores the result + + + + +operand + + + + + + + +n + + +Number of elements in operand + + + + +modulus + + +Modulus with which to perform modular reduction + + + + +input_mod_factor + + +Assumes input elements are in [0, input_mod_factor * p) Must be 0, 1, 2 or 4. input_mod_factor=0 means, no knowledge of input range. Barrett reduction will be used in this case. input_mod_factor >= output_mod_factor unless input_mod_factor == 0 + + + + +output_mod_factor + + +output elements will be in [0, output_mod_factor +p) Must be 1 or 2. for input_mod_factor=0, output_mod_factor will be set to 1. + + + + + + + + + + + + + CMPINT + CMPINT intel::hexl::Not + (CMPINT cmp) + Not + + CMPINT + cmp + + +Returns the logical negation of a binary operation. + + + + +cmp + + +The binary operation to negate + + + + + + + + + + + + + + + + + diff --git a/docs/docs/doxygen/xml/ntt_8hpp.xml b/docs/docs/doxygen/xml/ntt_8hpp.xml new file mode 100644 index 00000000..eb94c791 --- /dev/null +++ b/docs/docs/doxygen/xml/ntt_8hpp.xml @@ -0,0 +1,90 @@ + + + + ntt.hpp + stdint.h + memory + vector + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + intel::hexl::NTT + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +#include<stdint.h> + +#include<memory> +#include<vector> + +namespaceintel{ +namespacehexl{ + +classNTT{ +public: +NTT(); + +~NTT(); + +NTT(uint64_tdegree,uint64_tp); + +NTT(uint64_tdegree,uint64_tp,uint64_troot_of_unity); + +voidComputeForward(uint64_t*result,constuint64_t*operand, +uint64_tinput_mod_factor,uint64_toutput_mod_factor); + +voidComputeInverse(uint64_t*result,constuint64_t*operand, +uint64_tinput_mod_factor,uint64_toutput_mod_factor); + +classNTTImpl; + +private: +std::shared_ptr<NTTImpl>m_impl; +}; + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/docs/doxygen/xml/util_8hpp.xml b/docs/docs/doxygen/xml/util_8hpp.xml new file mode 100644 index 00000000..d5122e77 --- /dev/null +++ b/docs/docs/doxygen/xml/util_8hpp.xml @@ -0,0 +1,93 @@ + + + + util.hpp + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-add.hpp + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp + /Users/fboemer/repos/DBIO/intel-hexl/intel-hexl/include/intel-hexl/intel-hexl.hpp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + intel + intel::hexl + + + + + +//Copyright(C)2020-2021IntelCorporation +//SPDX-License-Identifier:Apache-2.0 + +#pragmaonce + +namespaceintel{ +namespacehexl{ + +#undefTRUE//MSVCdefinesTRUE +#undefFALSE//MSVCdefinesFALSE + +enumclassCMPINT{ +EQ=0, +LT=1, +LE=2, +FALSE=3, +NE=4, +NLT=5, +NLE=6, +TRUE=7 +}; + +inlineCMPINTNot(CMPINTcmp){ +switch(cmp){ +caseCMPINT::EQ: +returnCMPINT::NE; +caseCMPINT::LT: +returnCMPINT::NLT; +caseCMPINT::LE: +returnCMPINT::NLE; +caseCMPINT::FALSE: +returnCMPINT::TRUE; +caseCMPINT::NE: +returnCMPINT::EQ; +caseCMPINT::NLT: +returnCMPINT::LT; +caseCMPINT::NLE: +returnCMPINT::LE; +caseCMPINT::TRUE: +returnCMPINT::FALSE; +default: +returnCMPINT::FALSE; +} +} + +}//namespacehexl +}//namespaceintel + + + + diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..f19a6525 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,2 @@ + + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..2604f3d1 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,11 @@ +.. Copyright (C) 2020-2021 Intel Corporation +.. SPDX-License-Identifier: Apache-2.0 + + +Intel HEXL Documentation +============================== + +.. toctree:: + api + +.. mdinclude:: ../README.md diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 00000000..52efc041 --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +project(intel_hexl LANGUAGES C CXX) +cmake_minimum_required(VERSION 3.5.1) + +find_package(IntelHEXL 1.0.0 + HINTS ${INTEL_HEXL_HINT_DIR} + REQUIRED) +if (NOT TARGET intel_hexl) + FATAL_ERROR("TARGET intel_hexl not found") +endif() + +add_executable(example example.cpp) +target_link_libraries(example PRIVATE intel_hexl) diff --git a/example/README.md b/example/README.md new file mode 100644 index 00000000..7112a494 --- /dev/null +++ b/example/README.md @@ -0,0 +1,14 @@ +# Example using Intel HEXL in an external application + +To use Intel HEXL in an external application, first build Intel HEXL with `HEXL_EXPORT=ON`. Then, run `make install`. + +Next, in your external application, add the following lines to your `CMakeLists.txt`: + +```bash +find_package(IntelHEXL 1.0.0 + HINTS ${INTEL_HEXL_HINT_DIR} + REQUIRED) +target_link_libraries( intel_hexl) +``` + +If Intel HEXL is installed globally, `INTEL_HEXL_HINT_DIR` is not needed. Otherwise, `INTEL_HEXL_HINT_DIR` should be the directory containing `IntelHEXLConfig.cmake`, e.g. `${CMAKE_INSTALL_PREFIX}/lib/cmake/` diff --git a/example/example.cpp b/example/example.cpp new file mode 100644 index 00000000..01d556e1 --- /dev/null +++ b/example/example.cpp @@ -0,0 +1,142 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +#include "intel-hexl/intel-hexl.hpp" + +bool CheckEqual(const std::vector& x, + const std::vector& y) { + if (x.size() != y.size()) { + std::cout << "Not equal in size\n"; + return false; + } + uint64_t N = x.size(); + bool is_match = true; + for (size_t i = 0; i < N; ++i) { + if (x[i] != y[i]) { + std::cout << "Not equal at index " << i << "\n"; + is_match = false; + } + } + return is_match; +} + +void ExampleEltwiseAddMod() { + std::cout << "Running ExampleEltwiseAddMod...\n"; + + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{1, 3, 5, 7, 2, 4, 6, 8}; + uint64_t modulus = 10; + std::vector exp_out{2, 5, 8, 1, 7, 1, 3, 6}; + + intel::hexl::EltwiseAddMod(op1.data(), op1.data(), op2.data(), modulus, + op1.size()); + + CheckEqual(op1, exp_out); + std::cout << "Done running ExampleEltwiseAddMod\n"; +} + +void ExampleEltwiseCmpAdd() { + std::cout << "Running ExampleEltwiseCmpAdd...\n"; + + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + uint64_t cmp = 3; + uint64_t diff = 5; + std::vector exp_out{1, 2, 3, 9, 10, 11, 12, 13}; + + intel::hexl::EltwiseCmpAdd(op1.data(), op1.data(), intel::hexl::CMPINT::NLE, + cmp, diff, op1.size()); + + CheckEqual(op1, exp_out); + std::cout << "Done running ExampleEltwiseCmpAdd\n"; +} + +void ExampleEltwiseCmpSubMod() { + std::cout << "Running ExampleEltwiseCmpSubMod...\n"; + + std::vector op1{1, 2, 3, 4, 5, 6, 7}; + uint64_t bound = 4; + uint64_t diff = 5; + std::vector exp_out{1, 2, 3, 4, 0, 1, 2}; + + uint64_t modulus = 10; + + intel::hexl::EltwiseCmpSubMod(op1.data(), op1.data(), + intel::hexl::CMPINT::NLE, bound, diff, modulus, + op1.size()); + CheckEqual(op1, exp_out); + std::cout << "Done running ExampleEltwiseCmpSubMod\n"; +} + +void ExampleEltwiseFMAMod() { + std::cout << "Running ExampleEltwiseFMAMod...\n"; + + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint64_t arg2 = 1; + std::vector exp_out{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint64_t modulus = 769; + + intel::hexl::EltwiseFMAMod(arg1.data(), arg1.data(), arg2, nullptr, + arg1.size(), modulus, 1); + CheckEqual(arg1, exp_out); + std::cout << "Done running ExampleEltwiseFMAMod\n"; +} + +void ExampleEltwiseMultMod() { + std::cout << "Running ExampleEltwiseMultMod...\n"; + + std::vector op1{2, 4, 3, 2}; + std::vector op2{2, 1, 2, 0}; + std::vector exp_out{4, 4, 6, 0}; + + uint64_t modulus = 769; + + intel::hexl::EltwiseMultMod(op1.data(), op1.data(), op2.data(), op1.size(), + modulus, 1); + CheckEqual(op1, exp_out); + std::cout << "Done running ExampleEltwiseMultMod\n"; +} + +void ExampleNTT() { + std::cout << "Running ExampleNTT...\n"; + + uint64_t prime = 769; + uint64_t N = 8; + std::vector arg{1, 2, 3, 4, 5, 6, 7, 8}; + auto exp_out = arg; + intel::hexl::NTT ntt(N, prime); + + ntt.ComputeForward(arg.data(), arg.data(), 1, 1); + ntt.ComputeInverse(arg.data(), arg.data(), 1, 1); + + CheckEqual(arg, exp_out); + std::cout << "Done running ExampleNTT\n"; +} + +void ExampleReduceMod() { + std::cout << "Running ExampleReduceMod...\n"; + + uint64_t modulus = 5; + std::vector arg{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector exp_out{1, 2, 3, 4, 0, 1, 2, 3}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + intel::hexl::EltwiseReduceMod(result.data(), arg.data(), modulus, arg.size(), + 2, 1); + + CheckEqual(result, exp_out); + std::cout << "Done running ExampleReduceMod\n"; +} + +int main() { + ExampleEltwiseCmpAdd(); + ExampleEltwiseCmpSubMod(); + ExampleEltwiseFMAMod(); + ExampleEltwiseMultMod(); + ExampleNTT(); + ExampleReduceMod(); + + return 0; +} diff --git a/intel-hexl/CMakeLists.txt b/intel-hexl/CMakeLists.txt new file mode 100644 index 00000000..85e2c9f2 --- /dev/null +++ b/intel-hexl/CMakeLists.txt @@ -0,0 +1,125 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set(SRC + eltwise/eltwise-mult-mod.cpp + eltwise/eltwise-reduce-mod.cpp + eltwise/eltwise-add-mod.cpp + eltwise/eltwise-fma-mod.cpp + eltwise/eltwise-cmp-add.cpp + eltwise/eltwise-cmp-sub-mod.cpp + eltwise/eltwise-mult-mod-avx512.cpp + eltwise/eltwise-reduce-mod-avx512.cpp + eltwise/eltwise-add-mod-avx512.cpp + eltwise/eltwise-fma-mod-avx512.cpp + ntt/fwd-ntt-avx512.cpp + ntt/inv-ntt-avx512.cpp + ntt/ntt-internal.cpp + logging/logging.cpp + number-theory/number-theory.cpp +) + +if (HEXL_SHARED_LIB) + add_library(intel_hexl SHARED ${SRC}) +else() + add_library(intel_hexl STATIC ${SRC}) +endif() + +set_target_properties(intel_hexl PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties(intel_hexl PROPERTIES VERSION ${INTEL_HEXL_VERSION}) + +target_include_directories(intel_hexl + PRIVATE ${HEXL_SRC_ROOT_DIR} # Private headers + PUBLIC $ # Public headers + PUBLIC $ # Public headers + ) + +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + target_compile_options(intel_hexl PRIVATE -Wall -Wextra -Wno-unknown-pragmas -march=native -O3 -fomit-frame-pointer) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Inling causes some tests to fail on MSVC with AVX512 in Release mode, HEXL_DEBUG=OFF, + # so we disable it here + target_compile_options(intel_hexl PRIVATE /Wall /W4 /Zc:preprocessor /Ob0) +endif() + +install(DIRECTORY ${HEXL_INC_ROOT_DIR}/ + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/ + FILES_MATCHING + PATTERN "*.hpp" + PATTERN "*.h") + +if (HEXL_SHARED_LIB) + target_link_libraries(intel_hexl PRIVATE cpu_features) + if (HEXL_DEBUG) + target_link_libraries(intel_hexl PRIVATE easyloggingpp gflags) + endif() +else () + # For static library, we include all the dependencies for Intel HEXL in + # the libintel_hexl.a. + # For proper export of IntelHEXLConfig.cmake / IntelHEXLTargts.cmake, + # we avoid explicitly linking dependencies via target_link_libraries, since + # this would add dependencies to the exported intel_hexl target. + add_dependencies(intel_hexl cpu_features) + if (HEXL_DEBUG) + add_dependencies(intel_hexl gflags) + # Manually add logging include directory + target_include_directories(intel_hexl + PUBLIC $) + # Manually add gflags include directory + target_include_directories(intel_hexl + PUBLIC $) + endif() + + # Manually add cpu_features include directory + target_include_directories(intel_hexl + PRIVATE $) + + # Export logging only if we are debugging + if (${HEXL_DEBUG}) + set_target_properties(intel_hexl PROPERTIES OUTPUT_NAME "intel_hexl_debug") + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_custom_command(TARGET intel_hexl POST_BUILD + COMMAND ar -x $ + COMMAND ar -x $ + COMMAND ar -x $ + COMMAND ar -x $ + COMMAND ar -qcs $ *.o + COMMAND rm -f *.o + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS intel_hexl cpu_features gflags easyloggingpp + ) + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_custom_command(TARGET intel_hexl POST_BUILD + COMMAND lib.exe /OUT:$ + $ + $ + $ + $ + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS intel_hexl cpu_features gflags easyloggingpp + ) + endif() + else() + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_custom_command(TARGET intel_hexl POST_BUILD + COMMAND ar -x $ + COMMAND ar -x $ + COMMAND ar -qcs $ *.o + COMMAND rm -f *.o + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS intel_hexl cpu_features + ) + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_custom_command(TARGET intel_hexl POST_BUILD + COMMAND lib.exe /OUT:$ + $ + $ + # $ + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS intel_hexl cpu_features # gflags + ) + endif() + endif() +endif() + +install(TARGETS intel_hexl DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/intel-hexl/eltwise/eltwise-add-mod-avx512.cpp b/intel-hexl/eltwise/eltwise-add-mod-avx512.cpp new file mode 100644 index 00000000..e5555433 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-add-mod-avx512.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "eltwise/eltwise-add-mod-avx512.hpp" + +#include +#include + +#include "eltwise/eltwise-add-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-add-mod.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" + +#ifdef HEXL_HAS_AVX512DQ + +namespace intel { +namespace hexl { + +void EltwiseAddModAVX512(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(operand2 != nullptr, "Require operand2 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(modulus < (1ULL << 63), "Require modulus < 2**63"); + HEXL_CHECK_BOUNDS(operand1, n, modulus, + "pre-add value in operand1 exceeds bound " << modulus); + HEXL_CHECK_BOUNDS(operand2, n, modulus, + "pre-add value in operand2 exceeds bound " << modulus); + + uint64_t n_mod_8 = n % 8; + if (n_mod_8 != 0) { + EltwiseAddModNative(result, operand1, operand2, n_mod_8, modulus); + operand1 += n_mod_8; + operand2 += n_mod_8; + result += n_mod_8; + n -= n_mod_8; + } + + __m512i v_modulus = _mm512_set1_epi64(modulus); + __m512i* vp_result = reinterpret_cast<__m512i*>(result); + const __m512i* vp_operand1 = reinterpret_cast(operand1); + const __m512i* vp_operand2 = reinterpret_cast(operand2); + + HEXL_LOOP_UNROLL_4 + for (size_t i = n / 8; i > 0; --i) { + __m512i v_operand1 = _mm512_loadu_si512(vp_operand1); + __m512i v_operand2 = _mm512_loadu_si512(vp_operand2); + + __m512i v_result = + _mm512_hexl_small_add_mod_epi64(v_operand1, v_operand2, v_modulus); + + _mm512_storeu_si512(vp_result, v_result); + + ++vp_result; + ++vp_operand1; + ++vp_operand2; + } + + HEXL_CHECK_BOUNDS(result, n, modulus, "result exceeds bound " << modulus); +} + +} // namespace hexl +} // namespace intel + +#endif diff --git a/intel-hexl/eltwise/eltwise-add-mod-avx512.hpp b/intel-hexl/eltwise/eltwise-add-mod-avx512.hpp new file mode 100644 index 00000000..9b59c29e --- /dev/null +++ b/intel-hexl/eltwise/eltwise-add-mod-avx512.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace intel { +namespace hexl { + +void EltwiseAddModAVX512(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-add-mod-internal.hpp b/intel-hexl/eltwise/eltwise-add-mod-internal.hpp new file mode 100644 index 00000000..829fa08d --- /dev/null +++ b/intel-hexl/eltwise/eltwise-add-mod-internal.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +namespace intel { +namespace hexl { + +/// @brief Adds two vectors elementwise with modular reduction +/// @param[out] result Stores result +/// @param[in] operand1 Vector of elements to add +/// @param[in] operand2 Vector of elements to add +/// @param[in] n Number of elements in each vector +/// @param[in] modulus Modulus with which to perform modular reduction +/// @details Computes \f$ operand1[i] = (operand1[i] + operand2[i]) \mod modulus +/// \f$ for \f$ i=0, ..., n-1\f$. +void EltwiseAddModNative(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-add-mod.cpp b/intel-hexl/eltwise/eltwise-add-mod.cpp new file mode 100644 index 00000000..9d3f5dcf --- /dev/null +++ b/intel-hexl/eltwise/eltwise-add-mod.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/eltwise/eltwise-add-mod.hpp" + +#include "eltwise/eltwise-add-mod-avx512.hpp" +#include "eltwise/eltwise-add-mod-internal.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/check.hpp" +#include "util/cpu-features.hpp" + +namespace intel { +namespace hexl { + +// Algorithm 1 of https://hal.archives-ouvertes.fr/hal-01215845/document +void EltwiseAddModNative(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(operand2 != nullptr, "Require operand2 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(modulus < (1ULL << 63), "Require modulus < 2**63"); + HEXL_CHECK_BOUNDS(operand1, n, modulus, + "pre-add value in operand1 exceeds bound " << modulus); + HEXL_CHECK_BOUNDS(operand2, n, modulus, + "pre-add value in operand2 exceeds bound " << modulus); + + HEXL_LOOP_UNROLL_4 + for (size_t i = 0; i < n; ++i) { + uint64_t sum = *operand1 + *operand2; + if (sum >= modulus) { + *result = sum - modulus; + } else { + *result = sum; + } + + ++operand1; + ++operand2; + ++result; + } +} + +void EltwiseAddMod(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, uint64_t modulus) { + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(operand2 != nullptr, "Require operand2 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(modulus < (1ULL << 63), "Require modulus < 2**63"); + HEXL_CHECK_BOUNDS(operand1, n, modulus, + "pre-add value in operand1 exceeds bound " << modulus); + HEXL_CHECK_BOUNDS(operand2, n, modulus, + "pre-add value in operand2 exceeds bound " << modulus); + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq) { + EltwiseAddModAVX512(result, operand1, operand2, n, modulus); + return; + } +#endif + + HEXL_VLOG(3, "Calling EltwiseAddModNative"); + EltwiseAddModNative(result, operand1, operand2, n, modulus); +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-cmp-add-avx512.hpp b/intel-hexl/eltwise/eltwise-cmp-add-avx512.hpp new file mode 100644 index 00000000..5f5fe85f --- /dev/null +++ b/intel-hexl/eltwise/eltwise-cmp-add-avx512.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +/// @brief Computes element-wise conditional addition. +/// @param[out] result Stores the result +/// @param[in] operand1 Vector of elements to compare +/// @param[in] cmp Comparison operation +/// @param[in] bound Scalar to compare against +/// @param[in] diff Scalar to conditionally add +/// @param[in] n Number of elements in \p operand1 +/// @details Computes result[i] = cmp(operand1[i], bound) ? operand1[i] + +/// diff : operand1[i] for all \f$i=0, ..., n-1\f$. +void EltwiseCmpAddAVX512(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t n); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-cmp-add-internal.hpp b/intel-hexl/eltwise/eltwise-cmp-add-internal.hpp new file mode 100644 index 00000000..7e2b7eaf --- /dev/null +++ b/intel-hexl/eltwise/eltwise-cmp-add-internal.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +/// @brief Computes element-wise conditional addition. +/// @param[out] result Stores the result +/// @param[in] operand1 Vector of elements to compare +/// @param[in] cmp Comparison operation +/// @param[in] bound Scalar to compare against +/// @param[in] diff Scalar to conditionally add +/// @param[in] n Number of elements in \p operand1 +/// @details Computes result[i] = cmp(operand1[i], bound) ? operand1[i] + +/// diff : operand1[i] for all \f$i=0, ..., n-1\f$. +void EltwiseCmpAddNative(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t n); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-cmp-add.cpp b/intel-hexl/eltwise/eltwise-cmp-add.cpp new file mode 100644 index 00000000..c6e06872 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-cmp-add.cpp @@ -0,0 +1,141 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/eltwise/eltwise-cmp-add.hpp" + +#include "eltwise/eltwise-cmp-add-avx512.hpp" +#include "eltwise/eltwise-cmp-add-internal.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" +#include "util/cpu-features.hpp" + +namespace intel { +namespace hexl { + +void EltwiseCmpAdd(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t n) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(diff != 0, "Require diff != 0"); + HEXL_CHECK(n != 0, "Require n != 0"); + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq) { + EltwiseCmpAddAVX512(result, operand1, cmp, bound, diff, n); + return; + } +#endif + EltwiseCmpAddNative(result, operand1, cmp, bound, diff, n); +} + +void EltwiseCmpAddNative(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t n) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(diff != 0, "Require diff != 0"); + HEXL_CHECK(n != 0, "Require n != 0"); + + switch (cmp) { + case CMPINT::EQ: { + for (size_t i = 0; i < n; ++i) { + if (operand1[i] == bound) { + result[i] = operand1[i] + diff; + } else { + result[i] = operand1[i]; + } + } + break; + } + case CMPINT::LT: + for (size_t i = 0; i < n; ++i) { + if (operand1[i] < bound) { + result[i] = operand1[i] + diff; + } else { + result[i] = operand1[i]; + } + } + break; + case CMPINT::LE: + for (size_t i = 0; i < n; ++i) { + if (operand1[i] <= bound) { + result[i] = operand1[i] + diff; + } else { + result[i] = operand1[i]; + } + } + break; + case CMPINT::FALSE: + for (size_t i = 0; i < n; ++i) { + result[i] = operand1[i]; + } + break; + case CMPINT::NE: + for (size_t i = 0; i < n; ++i) { + if (operand1[i] != bound) { + result[i] = operand1[i] + diff; + } else { + result[i] = operand1[i]; + } + } + break; + case CMPINT::NLT: + for (size_t i = 0; i < n; ++i) { + if (operand1[i] >= bound) { + result[i] = operand1[i] + diff; + } else { + result[i] = operand1[i]; + } + } + break; + case CMPINT::NLE: + for (size_t i = 0; i < n; ++i) { + if (operand1[i] > bound) { + result[i] = operand1[i] + diff; + } else { + result[i] = operand1[i]; + } + } + break; + case CMPINT::TRUE: + for (size_t i = 0; i < n; ++i) { + result[i] = operand1[i] + diff; + } + break; + } +} + +#ifdef HEXL_HAS_AVX512DQ +void EltwiseCmpAddAVX512(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t n) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(diff != 0, "Require diff != 0"); + HEXL_CHECK(n != 0, "Require n != 0"); + + uint64_t n_mod_8 = n % 8; + if (n_mod_8 != 0) { + EltwiseCmpAddNative(result, operand1, cmp, bound, diff, n_mod_8); + operand1 += n_mod_8; + result += n_mod_8; + n -= n_mod_8; + } + + __m512i v_bound = _mm512_set1_epi64(bound); + const __m512i* v_op_ptr = reinterpret_cast(operand1); + __m512i* v_result_ptr = reinterpret_cast<__m512i*>(result); + for (size_t i = n / 8; i > 0; --i) { + __m512i v_op = _mm512_loadu_si512(v_op_ptr); + __m512i v_add_diff = _mm512_hexl_cmp_epi64(v_op, v_bound, cmp, diff); + v_op = _mm512_add_epi64(v_op, v_add_diff); + _mm512_storeu_si512(v_result_ptr, v_op); + + ++v_result_ptr; + ++v_op_ptr; + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-cmp-sub-mod-avx512.hpp b/intel-hexl/eltwise/eltwise-cmp-sub-mod-avx512.hpp new file mode 100644 index 00000000..960f6e11 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-cmp-sub-mod-avx512.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +/// @brief Computes element-wise conditional modular subtraction. +/// @param[out] result Stores the result +/// @param[in] operand1 Vector of elements to compare +/// @param[in] cmp Comparison function +/// @param[in] bound Scalar to compare against +/// @param[in] diff Scalar to subtract by +/// @param[in] modulus Modulus to reduce by +/// @param[in] n Number of elements in \p operand1 +/// @details Computes \p result[i] = (\p cmp(\p operand1, \p bound)) ? (\p +/// operand1 - \p diff) mod \p modulus : \p operand1 for all i=0, ..., n-1 +void EltwiseCmpSubModAVX512(uint64_t* result, const uint64_t* operand1, + CMPINT cmp, uint64_t bound, uint64_t diff, + uint64_t modulus, uint64_t n); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-cmp-sub-mod-internal.hpp b/intel-hexl/eltwise/eltwise-cmp-sub-mod-internal.hpp new file mode 100644 index 00000000..47b12236 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-cmp-sub-mod-internal.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +/// @brief Computes element-wise conditional modular subtraction. +/// @param[out] result Stores the result +/// @param[in] operand1 Vector of elements to compare +/// @param[in] cmp Comparison function +/// @param[in] bound Scalar to compare against +/// @param[in] diff Scalar to subtract by +/// @param[in] modulus Modulus to reduce by +/// @param[in] n Number of elements in \p operand1 +/// @details Computes \p result[i] = (\p cmp(\p operand1, \p bound)) ? (\p +/// operand1 - \p diff) mod \p modulus : \p operand1 for all i=0, ..., n-1 +void EltwiseCmpSubModNative(uint64_t* result, const uint64_t* operand1, + CMPINT cmp, uint64_t bound, uint64_t diff, + uint64_t modulus, uint64_t n); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-cmp-sub-mod.cpp b/intel-hexl/eltwise/eltwise-cmp-sub-mod.cpp new file mode 100644 index 00000000..c89a1d30 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-cmp-sub-mod.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp" + +#include "eltwise/eltwise-cmp-sub-mod-avx512.hpp" +#include "eltwise/eltwise-cmp-sub-mod-internal.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" +#include "util/cpu-features.hpp" +#include "util/util-internal.hpp" + +namespace intel { +namespace hexl { + +void EltwiseCmpSubMod(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t modulus, + uint64_t n) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(diff != 0, "Require diff != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(n != 0, "Require n != 0"); + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq) { + EltwiseCmpSubModAVX512(result, operand1, cmp, bound, diff, modulus, n); + return; + } +#endif + EltwiseCmpSubModNative(result, operand1, cmp, bound, diff, modulus, n); +} + +void EltwiseCmpSubModNative(uint64_t* result, const uint64_t* operand1, + CMPINT cmp, uint64_t bound, uint64_t diff, + uint64_t modulus, uint64_t n) { + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(diff != 0, "Require diff != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(n != 0, "Require n != 0") + + HEXL_CHECK(diff < modulus, "Diff " << diff << " >= modulus " << modulus); + for (size_t i = 0; i < n; ++i) { + uint64_t op = operand1[i]; + + bool op_cmp = Compare(cmp, op, bound); + op %= modulus; + + if (op_cmp) { + op = SubUIntMod(op, diff, modulus); + } + result[i] = op; + } +} + +#ifdef HEXL_HAS_AVX512DQ +void EltwiseCmpSubModAVX512(uint64_t* result, const uint64_t* operand1, + CMPINT cmp, uint64_t bound, uint64_t diff, + uint64_t modulus, uint64_t n) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(diff != 0, "Require diff != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(n != 0, "Require n != 0") + + uint64_t n_mod_8 = n % 8; + if (n_mod_8 != 0) { + EltwiseCmpSubModNative(result, operand1, cmp, bound, diff, modulus, + n_mod_8); + operand1 += n_mod_8; + result += n_mod_8; + n -= n_mod_8; + } + HEXL_CHECK(diff < modulus, "Diff " << diff << " >= modulus " << modulus); + + const __m512i* v_op_ptr = reinterpret_cast(operand1); + __m512i* v_result_ptr = reinterpret_cast<__m512i*>(result); + __m512i v_bound = _mm512_set1_epi64(bound); + __m512i v_diff = _mm512_set1_epi64(diff); + __m512i v_modulus = _mm512_set1_epi64(modulus); + + uint64_t mu = MultiplyFactor(1, 64, modulus).BarrettFactor(); + __m512i v_mu = _mm512_set1_epi64(mu); + + for (size_t i = n / 8; i > 0; --i) { + __m512i v_op = _mm512_loadu_si512(v_op_ptr); + __mmask8 op_le_cmp = _mm512_hexl_cmp_epu64_mask(v_op, v_bound, Not(cmp)); + + v_op = _mm512_hexl_barrett_reduce64(v_op, v_modulus, v_mu); + + __m512i v_to_add = _mm512_hexl_cmp_epi64(v_op, v_diff, CMPINT::LT, modulus); + v_to_add = _mm512_sub_epi64(v_to_add, v_diff); + v_to_add = _mm512_mask_set1_epi64(v_to_add, op_le_cmp, 0); + + v_op = _mm512_add_epi64(v_op, v_to_add); + _mm512_storeu_si512(v_result_ptr, v_op); + ++v_op_ptr; + ++v_result_ptr; + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-fma-mod-avx512.cpp b/intel-hexl/eltwise/eltwise-fma-mod-avx512.cpp new file mode 100644 index 00000000..c5273e4c --- /dev/null +++ b/intel-hexl/eltwise/eltwise-fma-mod-avx512.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "eltwise/eltwise-fma-mod-avx512.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512IFMA +template void EltwiseFMAModAVX512<52, 1>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +template void EltwiseFMAModAVX512<52, 2>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +template void EltwiseFMAModAVX512<52, 4>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +template void EltwiseFMAModAVX512<52, 8>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +#endif + +#ifdef HEXL_HAS_AVX512DQ +template void EltwiseFMAModAVX512<64, 1>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +template void EltwiseFMAModAVX512<64, 2>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +template void EltwiseFMAModAVX512<64, 4>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); +template void EltwiseFMAModAVX512<64, 8>(uint64_t* result, const uint64_t* arg1, + uint64_t arg2, const uint64_t* arg3, + uint64_t n, uint64_t modulus); + +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-fma-mod-avx512.hpp b/intel-hexl/eltwise/eltwise-fma-mod-avx512.hpp new file mode 100644 index 00000000..e57fbfa9 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-fma-mod-avx512.hpp @@ -0,0 +1,121 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +#include "eltwise/eltwise-fma-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-fma-mod.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +template +void EltwiseFMAModAVX512(uint64_t* result, const uint64_t* arg1, uint64_t arg2, + const uint64_t* arg3, uint64_t n, uint64_t modulus) { + HEXL_CHECK(modulus < MaximumValue(BitShift), + "Modulus " << modulus << " exceeds bit shift bound " + << MaximumValue(BitShift)); + HEXL_CHECK(modulus != 0, "Require modulus != 0"); + + HEXL_CHECK(arg1, "arg1 == nullptr"); + HEXL_CHECK(result, "result == nullptr"); + + HEXL_CHECK_BOUNDS(arg1, n, InputModFactor * modulus, + "arg1 exceeds bound " << (InputModFactor * modulus)); + HEXL_CHECK_BOUNDS(&arg2, 1, InputModFactor * modulus, + "arg2 exceeds bound " << (InputModFactor * modulus)); + HEXL_CHECK(BitShift == 52 || BitShift == 64, + "Invalid bitshift " << BitShift << "; need 52 or 64"); + + uint64_t n_mod_8 = n % 8; + if (n_mod_8 != 0) { + EltwiseFMAModNative(result, arg1, arg2, arg3, n_mod_8, + modulus); + arg1 += n_mod_8; + if (arg3 != nullptr) { + arg3 += n_mod_8; + } + result += n_mod_8; + n -= n_mod_8; + } + + uint64_t twice_modulus = 2 * modulus; + uint64_t four_times_modulus = 4 * modulus; + arg2 = ReduceMod(arg2, modulus, &twice_modulus, + &four_times_modulus); + uint64_t arg2_barr = MultiplyFactor(arg2, BitShift, modulus).BarrettFactor(); + + __m512i varg2_barr = _mm512_set1_epi64(arg2_barr); + + __m512i vmodulus = _mm512_set1_epi64(modulus); + __m512i v2_modulus = _mm512_set1_epi64(2 * modulus); + __m512i v4_modulus = _mm512_set1_epi64(4 * modulus); + const __m512i* vp_arg1 = reinterpret_cast(arg1); + __m512i varg2 = _mm512_set1_epi64(arg2); + varg2 = _mm512_hexl_small_mod_epu64(varg2, vmodulus, + &v2_modulus, &v4_modulus); + + __m512i* vp_result = reinterpret_cast<__m512i*>(result); + + if (arg3) { + const __m512i* vp_arg3 = reinterpret_cast(arg3); + HEXL_LOOP_UNROLL_4 + for (size_t i = n / 8; i > 0; --i) { + __m512i varg1 = _mm512_loadu_si512(vp_arg1); + __m512i varg3 = _mm512_loadu_si512(vp_arg3); + + varg1 = _mm512_hexl_small_mod_epu64( + varg1, vmodulus, &v2_modulus, &v4_modulus); + varg3 = _mm512_hexl_small_mod_epu64( + varg3, vmodulus, &v2_modulus, &v4_modulus); + + __m512i va_times_b = _mm512_hexl_mullo_epi<64>(varg1, varg2); + + __m512i vq = _mm512_hexl_mulhi_epi(varg1, varg2_barr); + __m512i vq_times_mod = _mm512_mullo_epi64(vq, vmodulus); + vq = _mm512_sub_epi64(va_times_b, vq_times_mod); + // Conditional Barrett subtraction + vq = _mm512_hexl_small_mod_epu64(vq, vmodulus); + + vq = _mm512_add_epi64(vq, varg3); + vq = _mm512_hexl_small_mod_epu64(vq, vmodulus); + + _mm512_storeu_si512(vp_result, vq); + + ++vp_arg1; + ++vp_result; + ++vp_arg3; + } + } else { // arg3 == nullptr + HEXL_LOOP_UNROLL_4 + for (size_t i = n / 8; i > 0; --i) { + __m512i varg1 = _mm512_loadu_si512(vp_arg1); + varg1 = _mm512_hexl_small_mod_epu64( + varg1, vmodulus, &v2_modulus, &v4_modulus); + + __m512i vq = _mm512_hexl_mulhi_epi(varg1, varg2_barr); + __m512i vq_times_mod = _mm512_mullo_epi64(vq, vmodulus); + __m512i va_times_b = _mm512_hexl_mullo_epi<64>(varg1, varg2); + vq = _mm512_sub_epi64(va_times_b, vq_times_mod); + // Conditional Barrett subtraction + vq = _mm512_hexl_small_mod_epu64(vq, vmodulus); + _mm512_storeu_si512(vp_result, vq); + + ++vp_arg1; + ++vp_result; + } + } +} + +#endif // HEXL_HAS_AVX512DQ + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-fma-mod-internal.hpp b/intel-hexl/eltwise/eltwise-fma-mod-internal.hpp new file mode 100644 index 00000000..83bf3aef --- /dev/null +++ b/intel-hexl/eltwise/eltwise-fma-mod-internal.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "number-theory/number-theory.hpp" + +namespace intel { +namespace hexl { + +template +void EltwiseFMAModNative(uint64_t* result, const uint64_t* arg1, uint64_t arg2, + const uint64_t* arg3, uint64_t n, uint64_t modulus) { + uint64_t twice_modulus = 2 * modulus; + uint64_t four_times_modulus = 4 * modulus; + arg2 = ReduceMod(arg2, modulus, &twice_modulus, + &four_times_modulus); + + MultiplyFactor mf(arg2, 64, modulus); + if (arg3) { + for (size_t i = 0; i < n; ++i) { + uint64_t arg1_val = ReduceMod( + *arg1++, modulus, &twice_modulus, &four_times_modulus); + uint64_t arg3_val = ReduceMod( + *arg3++, modulus, &twice_modulus, &four_times_modulus); + + uint64_t result_val = + MultiplyMod(arg1_val, arg2, mf.BarrettFactor(), modulus); + *result = AddUIntMod(result_val, arg3_val, modulus); + result++; + } + } else { // arg3 == nullptr + for (size_t i = 0; i < n; ++i) { + uint64_t arg1_val = ReduceMod( + *arg1++, modulus, &twice_modulus, &four_times_modulus); + *result++ = MultiplyMod(arg1_val, arg2, mf.BarrettFactor(), modulus); + } + } +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-fma-mod.cpp b/intel-hexl/eltwise/eltwise-fma-mod.cpp new file mode 100644 index 00000000..99042a7d --- /dev/null +++ b/intel-hexl/eltwise/eltwise-fma-mod.cpp @@ -0,0 +1,104 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/eltwise/eltwise-fma-mod.hpp" + +#include + +#include "eltwise/eltwise-fma-mod-avx512.hpp" +#include "eltwise/eltwise-fma-mod-internal.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/cpu-features.hpp" + +namespace intel { +namespace hexl { + +void EltwiseFMAMod(uint64_t* result, const uint64_t* arg1, uint64_t arg2, + const uint64_t* arg3, uint64_t n, uint64_t modulus, + uint64_t input_mod_factor) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(arg1 != nullptr, "Require arg1 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0") + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(modulus < (1ULL << 61), "Require modulus < (1ULL << 61)"); + HEXL_CHECK( + input_mod_factor == 1 || input_mod_factor == 2 || input_mod_factor == 4 || + input_mod_factor == 8, + "input_mod_factor must be 1, 2, 4, or 8. Got " << input_mod_factor); + HEXL_CHECK( + arg2 < input_mod_factor * modulus, + "arg2 " << arg2 << " exceeds bound " << (input_mod_factor * modulus)); + + HEXL_CHECK_BOUNDS(arg1, n, input_mod_factor * modulus, + "arg1 value " << (*std::max_element(arg1, arg1 + n)) + << " in EltwiseFMAMod exceeds bound " + << (input_mod_factor * modulus)); + HEXL_CHECK(arg3 == nullptr || (*std::max_element(arg3, arg3 + n) < + (input_mod_factor * modulus)), + "arg3 value in EltwiseFMAMod exceeds bound " + << (input_mod_factor * modulus)); + +#ifdef HEXL_HAS_AVX512IFMA + if (has_avx512ifma && input_mod_factor * modulus < (1ULL << 52)) { + HEXL_VLOG(3, "Calling 52-bit EltwiseFMAModAVX512"); + + switch (input_mod_factor) { + case 1: + EltwiseFMAModAVX512<52, 1>(result, arg1, arg2, arg3, n, modulus); + break; + case 2: + EltwiseFMAModAVX512<52, 2>(result, arg1, arg2, arg3, n, modulus); + break; + case 4: + EltwiseFMAModAVX512<52, 4>(result, arg1, arg2, arg3, n, modulus); + break; + case 8: + EltwiseFMAModAVX512<52, 8>(result, arg1, arg2, arg3, n, modulus); + break; + } + return; + } +#endif + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq) { + HEXL_VLOG(3, "Calling 64-bit EltwiseFMAModAVX512"); + + switch (input_mod_factor) { + case 1: + EltwiseFMAModAVX512<64, 1>(result, arg1, arg2, arg3, n, modulus); + break; + case 2: + EltwiseFMAModAVX512<64, 2>(result, arg1, arg2, arg3, n, modulus); + break; + case 4: + EltwiseFMAModAVX512<64, 4>(result, arg1, arg2, arg3, n, modulus); + break; + case 8: + EltwiseFMAModAVX512<64, 8>(result, arg1, arg2, arg3, n, modulus); + break; + } + return; + } +#endif + + HEXL_VLOG(3, "Calling EltwiseFMAModNative"); + switch (input_mod_factor) { + case 1: + EltwiseFMAModNative<1>(result, arg1, arg2, arg3, n, modulus); + break; + case 2: + EltwiseFMAModNative<2>(result, arg1, arg2, arg3, n, modulus); + break; + case 4: + EltwiseFMAModNative<4>(result, arg1, arg2, arg3, n, modulus); + break; + case 8: + EltwiseFMAModNative<8>(result, arg1, arg2, arg3, n, modulus); + break; + } +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-mult-mod-avx512.cpp b/intel-hexl/eltwise/eltwise-mult-mod-avx512.cpp new file mode 100644 index 00000000..4eb11c96 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-mult-mod-avx512.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "eltwise/eltwise-mult-mod-avx512.hpp" + +#include +#include + +#include "eltwise/eltwise-mult-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-mult-mod.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +template void EltwiseMultModAVX512Float<1>(uint64_t* result, + const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); +template void EltwiseMultModAVX512Float<2>(uint64_t* result, + const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); +template void EltwiseMultModAVX512Float<4>(uint64_t* result, + const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); + +template void EltwiseMultModAVX512Int<1>(uint64_t* result, + const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); +template void EltwiseMultModAVX512Int<2>(uint64_t* result, + const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); +template void EltwiseMultModAVX512Int<4>(uint64_t* result, + const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus); + +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-mult-mod-avx512.hpp b/intel-hexl/eltwise/eltwise-mult-mod-avx512.hpp new file mode 100644 index 00000000..b7c45432 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-mult-mod-avx512.hpp @@ -0,0 +1,633 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +#include + +#include "eltwise/eltwise-mult-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-mult-mod.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +template +void EltwiseMultModAVX512IntLoop8192(__m512i* vp_result, + const __m512i* vp_operand1, + const __m512i* vp_operand2, + __m512i vbarr_lo, __m512i v_modulus, + __m512i v_twice_mod) { + (void)v_twice_mod; // Avoid unused variable + HEXL_LOOP_UNROLL_4 + for (size_t i = 64; i > 0; --i) { + __m512i x1 = _mm512_loadu_si512(vp_operand1++); + __m512i y1 = _mm512_loadu_si512(vp_operand2++); + __m512i x2 = _mm512_loadu_si512(vp_operand1++); + __m512i y2 = _mm512_loadu_si512(vp_operand2++); + __m512i x3 = _mm512_loadu_si512(vp_operand1++); + __m512i y3 = _mm512_loadu_si512(vp_operand2++); + __m512i x4 = _mm512_loadu_si512(vp_operand1++); + __m512i y4 = _mm512_loadu_si512(vp_operand2++); + __m512i x5 = _mm512_loadu_si512(vp_operand1++); + __m512i y5 = _mm512_loadu_si512(vp_operand2++); + __m512i x6 = _mm512_loadu_si512(vp_operand1++); + __m512i y6 = _mm512_loadu_si512(vp_operand2++); + __m512i x7 = _mm512_loadu_si512(vp_operand1++); + __m512i y7 = _mm512_loadu_si512(vp_operand2++); + __m512i x8 = _mm512_loadu_si512(vp_operand1++); + __m512i y8 = _mm512_loadu_si512(vp_operand2++); + __m512i x9 = _mm512_loadu_si512(vp_operand1++); + __m512i y9 = _mm512_loadu_si512(vp_operand2++); + __m512i x10 = _mm512_loadu_si512(vp_operand1++); + __m512i y10 = _mm512_loadu_si512(vp_operand2++); + __m512i x11 = _mm512_loadu_si512(vp_operand1++); + __m512i y11 = _mm512_loadu_si512(vp_operand2++); + __m512i x12 = _mm512_loadu_si512(vp_operand1++); + __m512i y12 = _mm512_loadu_si512(vp_operand2++); + __m512i x13 = _mm512_loadu_si512(vp_operand1++); + __m512i y13 = _mm512_loadu_si512(vp_operand2++); + __m512i x14 = _mm512_loadu_si512(vp_operand1++); + __m512i y14 = _mm512_loadu_si512(vp_operand2++); + __m512i x15 = _mm512_loadu_si512(vp_operand1++); + __m512i y15 = _mm512_loadu_si512(vp_operand2++); + __m512i x16 = _mm512_loadu_si512(vp_operand1++); + __m512i y16 = _mm512_loadu_si512(vp_operand2++); + + x1 = _mm512_hexl_small_mod_epu64(x1, v_modulus, + &v_twice_mod); + x2 = _mm512_hexl_small_mod_epu64(x2, v_modulus, + &v_twice_mod); + x3 = _mm512_hexl_small_mod_epu64(x3, v_modulus, + &v_twice_mod); + x4 = _mm512_hexl_small_mod_epu64(x4, v_modulus, + &v_twice_mod); + x5 = _mm512_hexl_small_mod_epu64(x5, v_modulus, + &v_twice_mod); + x6 = _mm512_hexl_small_mod_epu64(x6, v_modulus, + &v_twice_mod); + x7 = _mm512_hexl_small_mod_epu64(x7, v_modulus, + &v_twice_mod); + x8 = _mm512_hexl_small_mod_epu64(x8, v_modulus, + &v_twice_mod); + x9 = _mm512_hexl_small_mod_epu64(x9, v_modulus, + &v_twice_mod); + x10 = _mm512_hexl_small_mod_epu64(x10, v_modulus, + &v_twice_mod); + x11 = _mm512_hexl_small_mod_epu64(x11, v_modulus, + &v_twice_mod); + x12 = _mm512_hexl_small_mod_epu64(x12, v_modulus, + &v_twice_mod); + x13 = _mm512_hexl_small_mod_epu64(x13, v_modulus, + &v_twice_mod); + x14 = _mm512_hexl_small_mod_epu64(x14, v_modulus, + &v_twice_mod); + x15 = _mm512_hexl_small_mod_epu64(x15, v_modulus, + &v_twice_mod); + x16 = _mm512_hexl_small_mod_epu64(x16, v_modulus, + &v_twice_mod); + + y1 = _mm512_hexl_small_mod_epu64(y1, v_modulus, + &v_twice_mod); + y2 = _mm512_hexl_small_mod_epu64(y2, v_modulus, + &v_twice_mod); + y3 = _mm512_hexl_small_mod_epu64(y3, v_modulus, + &v_twice_mod); + y4 = _mm512_hexl_small_mod_epu64(y4, v_modulus, + &v_twice_mod); + y5 = _mm512_hexl_small_mod_epu64(y5, v_modulus, + &v_twice_mod); + y6 = _mm512_hexl_small_mod_epu64(y6, v_modulus, + &v_twice_mod); + y7 = _mm512_hexl_small_mod_epu64(y7, v_modulus, + &v_twice_mod); + y8 = _mm512_hexl_small_mod_epu64(y8, v_modulus, + &v_twice_mod); + y9 = _mm512_hexl_small_mod_epu64(y9, v_modulus, + &v_twice_mod); + y10 = _mm512_hexl_small_mod_epu64(y10, v_modulus, + &v_twice_mod); + y11 = _mm512_hexl_small_mod_epu64(y11, v_modulus, + &v_twice_mod); + y12 = _mm512_hexl_small_mod_epu64(y12, v_modulus, + &v_twice_mod); + y13 = _mm512_hexl_small_mod_epu64(y13, v_modulus, + &v_twice_mod); + y14 = _mm512_hexl_small_mod_epu64(y14, v_modulus, + &v_twice_mod); + y15 = _mm512_hexl_small_mod_epu64(y15, v_modulus, + &v_twice_mod); + y16 = _mm512_hexl_small_mod_epu64(y16, v_modulus, + &v_twice_mod); + + __m512i zhi1 = _mm512_hexl_mulhi_epi<64>(x1, y1); + __m512i zhi2 = _mm512_hexl_mulhi_epi<64>(x2, y2); + __m512i zhi3 = _mm512_hexl_mulhi_epi<64>(x3, y3); + __m512i zhi4 = _mm512_hexl_mulhi_epi<64>(x4, y4); + __m512i zhi5 = _mm512_hexl_mulhi_epi<64>(x5, y5); + __m512i zhi6 = _mm512_hexl_mulhi_epi<64>(x6, y6); + __m512i zhi7 = _mm512_hexl_mulhi_epi<64>(x7, y7); + __m512i zhi8 = _mm512_hexl_mulhi_epi<64>(x8, y8); + __m512i zhi9 = _mm512_hexl_mulhi_epi<64>(x9, y9); + __m512i zhi10 = _mm512_hexl_mulhi_epi<64>(x10, y10); + __m512i zhi11 = _mm512_hexl_mulhi_epi<64>(x11, y11); + __m512i zhi12 = _mm512_hexl_mulhi_epi<64>(x12, y12); + __m512i zhi13 = _mm512_hexl_mulhi_epi<64>(x13, y13); + __m512i zhi14 = _mm512_hexl_mulhi_epi<64>(x14, y14); + __m512i zhi15 = _mm512_hexl_mulhi_epi<64>(x15, y15); + __m512i zhi16 = _mm512_hexl_mulhi_epi<64>(x16, y16); + + __m512i zlo1 = _mm512_hexl_mullo_epi<64>(x1, y1); + __m512i zlo2 = _mm512_hexl_mullo_epi<64>(x2, y2); + __m512i zlo3 = _mm512_hexl_mullo_epi<64>(x3, y3); + __m512i zlo4 = _mm512_hexl_mullo_epi<64>(x4, y4); + __m512i zlo5 = _mm512_hexl_mullo_epi<64>(x5, y5); + __m512i zlo6 = _mm512_hexl_mullo_epi<64>(x6, y6); + __m512i zlo7 = _mm512_hexl_mullo_epi<64>(x7, y7); + __m512i zlo8 = _mm512_hexl_mullo_epi<64>(x8, y8); + __m512i zlo9 = _mm512_hexl_mullo_epi<64>(x9, y9); + __m512i zlo10 = _mm512_hexl_mullo_epi<64>(x10, y10); + __m512i zlo11 = _mm512_hexl_mullo_epi<64>(x11, y11); + __m512i zlo12 = _mm512_hexl_mullo_epi<64>(x12, y12); + __m512i zlo13 = _mm512_hexl_mullo_epi<64>(x13, y13); + __m512i zlo14 = _mm512_hexl_mullo_epi<64>(x14, y14); + __m512i zlo15 = _mm512_hexl_mullo_epi<64>(x15, y15); + __m512i zlo16 = _mm512_hexl_mullo_epi<64>(x16, y16); + + __m512i c1 = _mm512_hexl_shrdi_epi64(zlo1, zhi1); + __m512i c2 = _mm512_hexl_shrdi_epi64(zlo2, zhi2); + __m512i c3 = _mm512_hexl_shrdi_epi64(zlo3, zhi3); + __m512i c4 = _mm512_hexl_shrdi_epi64(zlo4, zhi4); + __m512i c5 = _mm512_hexl_shrdi_epi64(zlo5, zhi5); + __m512i c6 = _mm512_hexl_shrdi_epi64(zlo6, zhi6); + __m512i c7 = _mm512_hexl_shrdi_epi64(zlo7, zhi7); + __m512i c8 = _mm512_hexl_shrdi_epi64(zlo8, zhi8); + __m512i c9 = _mm512_hexl_shrdi_epi64(zlo9, zhi9); + __m512i c10 = _mm512_hexl_shrdi_epi64(zlo10, zhi10); + __m512i c11 = _mm512_hexl_shrdi_epi64(zlo11, zhi11); + __m512i c12 = _mm512_hexl_shrdi_epi64(zlo12, zhi12); + __m512i c13 = _mm512_hexl_shrdi_epi64(zlo13, zhi13); + __m512i c14 = _mm512_hexl_shrdi_epi64(zlo14, zhi14); + __m512i c15 = _mm512_hexl_shrdi_epi64(zlo15, zhi15); + __m512i c16 = _mm512_hexl_shrdi_epi64(zlo16, zhi16); + + c1 = _mm512_hexl_mulhi_epi<64>(c1, vbarr_lo); + c2 = _mm512_hexl_mulhi_epi<64>(c2, vbarr_lo); + c3 = _mm512_hexl_mulhi_epi<64>(c3, vbarr_lo); + c4 = _mm512_hexl_mulhi_epi<64>(c4, vbarr_lo); + c5 = _mm512_hexl_mulhi_epi<64>(c5, vbarr_lo); + c6 = _mm512_hexl_mulhi_epi<64>(c6, vbarr_lo); + c7 = _mm512_hexl_mulhi_epi<64>(c7, vbarr_lo); + c8 = _mm512_hexl_mulhi_epi<64>(c8, vbarr_lo); + c9 = _mm512_hexl_mulhi_epi<64>(c9, vbarr_lo); + c10 = _mm512_hexl_mulhi_epi<64>(c10, vbarr_lo); + c11 = _mm512_hexl_mulhi_epi<64>(c11, vbarr_lo); + c12 = _mm512_hexl_mulhi_epi<64>(c12, vbarr_lo); + c13 = _mm512_hexl_mulhi_epi<64>(c13, vbarr_lo); + c14 = _mm512_hexl_mulhi_epi<64>(c14, vbarr_lo); + c15 = _mm512_hexl_mulhi_epi<64>(c15, vbarr_lo); + c16 = _mm512_hexl_mulhi_epi<64>(c16, vbarr_lo); + + __m512i vr1 = _mm512_hexl_mullo_epi<64>(c1, v_modulus); + __m512i vr2 = _mm512_hexl_mullo_epi<64>(c2, v_modulus); + __m512i vr3 = _mm512_hexl_mullo_epi<64>(c3, v_modulus); + __m512i vr4 = _mm512_hexl_mullo_epi<64>(c4, v_modulus); + __m512i vr5 = _mm512_hexl_mullo_epi<64>(c5, v_modulus); + __m512i vr6 = _mm512_hexl_mullo_epi<64>(c6, v_modulus); + __m512i vr7 = _mm512_hexl_mullo_epi<64>(c7, v_modulus); + __m512i vr8 = _mm512_hexl_mullo_epi<64>(c8, v_modulus); + __m512i vr9 = _mm512_hexl_mullo_epi<64>(c9, v_modulus); + __m512i vr10 = _mm512_hexl_mullo_epi<64>(c10, v_modulus); + __m512i vr11 = _mm512_hexl_mullo_epi<64>(c11, v_modulus); + __m512i vr12 = _mm512_hexl_mullo_epi<64>(c12, v_modulus); + __m512i vr13 = _mm512_hexl_mullo_epi<64>(c13, v_modulus); + __m512i vr14 = _mm512_hexl_mullo_epi<64>(c14, v_modulus); + __m512i vr15 = _mm512_hexl_mullo_epi<64>(c15, v_modulus); + __m512i vr16 = _mm512_hexl_mullo_epi<64>(c16, v_modulus); + + vr1 = _mm512_sub_epi64(zlo1, vr1); + vr2 = _mm512_sub_epi64(zlo2, vr2); + vr3 = _mm512_sub_epi64(zlo3, vr3); + vr4 = _mm512_sub_epi64(zlo4, vr4); + vr5 = _mm512_sub_epi64(zlo5, vr5); + vr6 = _mm512_sub_epi64(zlo6, vr6); + vr7 = _mm512_sub_epi64(zlo7, vr7); + vr8 = _mm512_sub_epi64(zlo8, vr8); + vr9 = _mm512_sub_epi64(zlo9, vr9); + vr10 = _mm512_sub_epi64(zlo10, vr10); + vr11 = _mm512_sub_epi64(zlo11, vr11); + vr12 = _mm512_sub_epi64(zlo12, vr12); + vr13 = _mm512_sub_epi64(zlo13, vr13); + vr14 = _mm512_sub_epi64(zlo14, vr14); + vr15 = _mm512_sub_epi64(zlo15, vr15); + vr16 = _mm512_sub_epi64(zlo16, vr16); + + vr1 = _mm512_hexl_small_mod_epu64(vr1, v_modulus); + vr2 = _mm512_hexl_small_mod_epu64(vr2, v_modulus); + vr3 = _mm512_hexl_small_mod_epu64(vr3, v_modulus); + vr4 = _mm512_hexl_small_mod_epu64(vr4, v_modulus); + vr5 = _mm512_hexl_small_mod_epu64(vr5, v_modulus); + vr6 = _mm512_hexl_small_mod_epu64(vr6, v_modulus); + vr7 = _mm512_hexl_small_mod_epu64(vr7, v_modulus); + vr8 = _mm512_hexl_small_mod_epu64(vr8, v_modulus); + vr9 = _mm512_hexl_small_mod_epu64(vr9, v_modulus); + vr10 = _mm512_hexl_small_mod_epu64(vr10, v_modulus); + vr11 = _mm512_hexl_small_mod_epu64(vr11, v_modulus); + vr12 = _mm512_hexl_small_mod_epu64(vr12, v_modulus); + vr13 = _mm512_hexl_small_mod_epu64(vr13, v_modulus); + vr14 = _mm512_hexl_small_mod_epu64(vr14, v_modulus); + vr15 = _mm512_hexl_small_mod_epu64(vr15, v_modulus); + vr16 = _mm512_hexl_small_mod_epu64(vr16, v_modulus); + + _mm512_storeu_si512(vp_result++, vr1); + _mm512_storeu_si512(vp_result++, vr2); + _mm512_storeu_si512(vp_result++, vr3); + _mm512_storeu_si512(vp_result++, vr4); + _mm512_storeu_si512(vp_result++, vr5); + _mm512_storeu_si512(vp_result++, vr6); + _mm512_storeu_si512(vp_result++, vr7); + _mm512_storeu_si512(vp_result++, vr8); + _mm512_storeu_si512(vp_result++, vr9); + _mm512_storeu_si512(vp_result++, vr10); + _mm512_storeu_si512(vp_result++, vr11); + _mm512_storeu_si512(vp_result++, vr12); + _mm512_storeu_si512(vp_result++, vr13); + _mm512_storeu_si512(vp_result++, vr14); + _mm512_storeu_si512(vp_result++, vr15); + _mm512_storeu_si512(vp_result++, vr16); + } +} + +template +void EltwiseMultModAVX512IntLoop16384(__m512i* vp_result, + const __m512i* vp_operand1, + const __m512i* vp_operand2, + __m512i vbarr_lo, __m512i v_modulus, + __m512i v_twice_mod) { + EltwiseMultModAVX512IntLoop8192( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, v_twice_mod); + vp_operand1 += 1024; + vp_operand2 += 1024; + vp_result += 1024; + EltwiseMultModAVX512IntLoop8192( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, v_twice_mod); +} + +// Algorithm 1 from +// https://hal.archives-ouvertes.fr/hal-01215845/document +template +void EltwiseMultModAVX512IntLoopDefault(__m512i* vp_result, + const __m512i* vp_operand1, + const __m512i* vp_operand2, + __m512i vbarr_lo, __m512i v_modulus, + __m512i v_twice_mod, uint64_t n) { + (void)v_twice_mod; // Avoid unused variable + HEXL_LOOP_UNROLL_4 + for (size_t i = n / 8; i > 0; --i) { + __m512i v_operand1 = _mm512_loadu_si512(vp_operand1); + __m512i v_operand2 = _mm512_loadu_si512(vp_operand2); + + v_operand1 = _mm512_hexl_small_mod_epu64( + v_operand1, v_modulus, &v_twice_mod); + + v_operand2 = _mm512_hexl_small_mod_epu64( + v_operand2, v_modulus, &v_twice_mod); + + __m512i vprod_hi = _mm512_hexl_mulhi_epi<64>(v_operand1, v_operand2); + __m512i vprod_lo = _mm512_hexl_mullo_epi<64>(v_operand1, v_operand2); + __m512i c1 = _mm512_hexl_shrdi_epi64(vprod_lo, vprod_hi); + __m512i c3 = _mm512_hexl_mulhi_epi<64>(c1, vbarr_lo); + __m512i vresult = _mm512_hexl_mullo_epi<64>(c3, v_modulus); + vresult = _mm512_sub_epi64(vprod_lo, vresult); + vresult = _mm512_hexl_small_mod_epu64(vresult, v_modulus); + _mm512_storeu_si512(vp_result, vresult); + + ++vp_operand1; + ++vp_operand2; + ++vp_result; + } +} + +template +void EltwiseMultModAVX512IntLoop(__m512i* vp_result, const __m512i* vp_operand1, + const __m512i* vp_operand2, __m512i vbarr_lo, + __m512i v_modulus, __m512i v_twice_mod, + uint64_t n) { + if (n == 8192) { + EltwiseMultModAVX512IntLoop8192( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, v_twice_mod); + } else if (n == 16384) { + EltwiseMultModAVX512IntLoop16384( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, v_twice_mod); + } else { + EltwiseMultModAVX512IntLoopDefault( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, v_twice_mod, + n); + } +} + +// Algorithm 1 from https://hal.archives-ouvertes.fr/hal-01215845/document +template +void EltwiseMultModAVX512Int(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus) { + HEXL_CHECK(InputModFactor == 1 || InputModFactor == 2 || InputModFactor == 4, + "Require InputModFactor = 1, 2, or 4") + HEXL_CHECK(InputModFactor * modulus > (1ULL << 50), + "Require InputModFactor * modulus > (1ULL << 50)") + HEXL_CHECK(InputModFactor * modulus < (1ULL << 63), + "Require InputModFactor * modulus < (1ULL << 63)"); + HEXL_CHECK(modulus < (1ULL << 62), "Require modulus < (1ULL << 62)"); + HEXL_CHECK_BOUNDS( + operand1, n, InputModFactor * modulus, + "pre-mult value in operand1 exceeds bound " << InputModFactor * modulus); + HEXL_CHECK_BOUNDS( + operand2, n, InputModFactor * modulus, + "Value in operand2 exceeds bound " << InputModFactor * modulus); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + uint64_t n_mod_8 = n % 8; + if (n_mod_8 != 0) { + EltwiseMultModNative(result, operand1, operand2, n_mod_8, + modulus); + operand1 += n_mod_8; + operand2 += n_mod_8; + result += n_mod_8; + n -= n_mod_8; + } + + const uint64_t logmod = uint64_t(floorl(std::log2l(modulus)) - 1); + uint64_t log2_input_mod_factor = 0; + if (InputModFactor == 2) { + log2_input_mod_factor = 1; + } else if (InputModFactor == 4) { + log2_input_mod_factor = 2; + } + + // modulus < 2**N + const uint64_t N = logmod + 1; + uint64_t L = 63 + N; // Ensures L-N+1 == 64 + uint64_t op_hi = uint64_t(1) << (L - 64); + uint64_t op_lo = uint64_t(0); + uint64_t barr_lo = DivideUInt128UInt64Lo(op_hi, op_lo, modulus); + + // Let d be the product operand1 * operand2. + // To ensure d >> (N - 1) < (1ULL << 64), we need + // (input_mod_factor * modulus)^2 >> (N-1) < (1ULL << 64) + // This happens when 2 * log_2(input_mod_factor) + N < 63 + // If not, we need to reduce the inputs to be less than modulus for + // correctness. This is less efficient, so we avoid it when possible. + bool reduce_mod = 2 * log2_input_mod_factor + N >= 63; + + __m512i vbarr_lo = _mm512_set1_epi64(barr_lo); + __m512i v_modulus = _mm512_set1_epi64(modulus); + __m512i v_twice_mod = _mm512_set1_epi64(2 * modulus); + const __m512i* vp_operand1 = reinterpret_cast(operand1); + const __m512i* vp_operand2 = reinterpret_cast(operand2); + __m512i* vp_result = reinterpret_cast<__m512i*>(result); + + if (reduce_mod) { + // This case happens only when N >= 63 - 2 * log2(input_mod_factor) = 59 + // Additionally, modulus < (1ULL << 62) implies N <= 62. + // So N == 59, 60, 61, 62 are the only cases here. + switch (N) { + case 59: { + EltwiseMultModAVX512IntLoop<59, InputModFactor>( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, + v_twice_mod, n); + break; + } + case 60: { + EltwiseMultModAVX512IntLoop<60, InputModFactor>( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, + v_twice_mod, n); + break; + } + case 61: { + EltwiseMultModAVX512IntLoop<61, InputModFactor>( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, + v_twice_mod, n); + break; + } + case 62: { + EltwiseMultModAVX512IntLoop<62, InputModFactor>( + vp_result, vp_operand1, vp_operand2, vbarr_lo, v_modulus, + v_twice_mod, n); + break; + } + default: { + HEXL_CHECK(false, "Bad value for N: " << N); + } + } + } else { // Input mod reduction not required; pass InputModFactor == 1. + // For N < 50, we should prefer EltwiseMultModAVX512Float, so we don't + // generate a special case for it here + + // The template arguments are required for use of _mm512_hexl_shrdi_epi64, + // which requires a compile-time constant for the shift. + switch (N) { + case 50: { + EltwiseMultModAVX512IntLoop<50, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 51: { + EltwiseMultModAVX512IntLoop<51, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 52: { + EltwiseMultModAVX512IntLoop<52, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 53: { + EltwiseMultModAVX512IntLoop<53, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 54: { + EltwiseMultModAVX512IntLoop<54, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 55: { + EltwiseMultModAVX512IntLoop<55, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 56: { + EltwiseMultModAVX512IntLoop<56, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 57: { + EltwiseMultModAVX512IntLoop<57, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 58: { + EltwiseMultModAVX512IntLoop<58, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 59: { + EltwiseMultModAVX512IntLoop<59, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 60: { + EltwiseMultModAVX512IntLoop<60, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + case 61: { + EltwiseMultModAVX512IntLoop<61, 1>(vp_result, vp_operand1, vp_operand2, + vbarr_lo, v_modulus, v_twice_mod, n); + break; + } + default: { + // Algorithm 1 from + // https://hal.archives-ouvertes.fr/hal-01215845/document + HEXL_LOOP_UNROLL_4 + for (size_t i = n / 8; i > 0; --i) { + __m512i v_operand1 = _mm512_loadu_si512(vp_operand1); + __m512i v_operand2 = _mm512_loadu_si512(vp_operand2); + + // Compute product + __m512i vprod_hi = _mm512_hexl_mulhi_epi<64>(v_operand1, v_operand2); + __m512i vprod_lo = _mm512_hexl_mullo_epi<64>(v_operand1, v_operand2); + + __m512i c1 = _mm512_hexl_shrdi_epi64(vprod_lo, vprod_hi, N - 1); + + // L - N + 1 == 64, so we only need high 64 bits + __m512i c3 = _mm512_hexl_mulhi_epi<64>(c1, vbarr_lo); + + // C4 = prod_lo - (p * c3)_lo + __m512i vresult = _mm512_hexl_mullo_epi<64>(c3, v_modulus); + vresult = _mm512_sub_epi64(vprod_lo, vresult); + + // Conditional subtraction + vresult = _mm512_hexl_small_mod_epu64(vresult, v_modulus); + _mm512_storeu_si512(vp_result, vresult); + + ++vp_operand1; + ++vp_operand2; + ++vp_result; + } + } + } + } + HEXL_CHECK_BOUNDS(result, n, modulus, + "post-mult value in result exceeds bound " << modulus); +} + +// From Function 18, page 19 of https://arxiv.org/pdf/1407.3383.pdf +// See also Algorithm 2/3 of +// https://hal.archives-ouvertes.fr/hal-02552673/document +template +inline void EltwiseMultModAVX512FloatLoop(__m512i* vp_result, + const __m512i* vp_operand1, + const __m512i* vp_operand2, __m512d u, + __m512d p, __m512i v_modulus, + __m512i v_twice_mod, uint64_t n) { + (void)v_twice_mod; // Avoid unused variable + + HEXL_LOOP_UNROLL_4 + for (size_t i = n / 8; i > 0; --i) { + __m512i v_operand1 = _mm512_loadu_si512(vp_operand1); + v_operand1 = _mm512_hexl_small_mod_epu64( + v_operand1, v_modulus, &v_twice_mod); + + __m512i v_operand2 = _mm512_loadu_si512(vp_operand2); + v_operand2 = _mm512_hexl_small_mod_epu64( + v_operand2, v_modulus, &v_twice_mod); + + __m512i v_result; + __m512d x = _mm512_cvt_roundepu64_pd( + v_operand1, (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); + __m512d y = _mm512_cvt_roundepu64_pd( + v_operand2, (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); + + __m512d h = _mm512_mul_pd(x, y); + __m512d l = _mm512_fmsub_pd(x, y, h); // rounding error; h + l == x * y + __m512d b = _mm512_mul_pd(h, u); // ~ (x * y) / p + __m512d c = _mm512_floor_pd(b); // ~ floor(x * y / p) + __m512d d = _mm512_fnmadd_pd(c, p, h); + __m512d g = _mm512_add_pd(d, l); + __mmask8 m = _mm512_cmp_pd_mask(g, _mm512_setzero_pd(), _CMP_LT_OQ); + g = _mm512_mask_add_pd(g, m, g, p); + + v_result = _mm512_cvt_roundpd_epu64( + g, (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); + + _mm512_storeu_si512(vp_result, v_result); + + ++vp_operand1; + ++vp_operand2; + ++vp_result; + } +} + +// From Function 18, page 19 of https://arxiv.org/pdf/1407.3383.pdf +// See also Algorithm 2/3 of +// https://hal.archives-ouvertes.fr/hal-02552673/document +template +void EltwiseMultModAVX512Float(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus) { + HEXL_CHECK(modulus < MaximumValue(50), + " modulus " << modulus << " exceeds bound " << MaximumValue(50)); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + + HEXL_CHECK_BOUNDS( + operand1, n, InputModFactor * modulus, + "pre-mult value in operand1 exceeds bound " << InputModFactor * modulus); + HEXL_CHECK_BOUNDS( + operand2, n, InputModFactor * modulus, + "Value in operand2 exceeds bound " << InputModFactor * modulus); + uint64_t n_mod_8 = n % 8; + if (n_mod_8 != 0) { + EltwiseMultModNative(result, operand1, operand2, n_mod_8, + modulus); + operand1 += n_mod_8; + operand2 += n_mod_8; + result += n_mod_8; + n -= n_mod_8; + } + __m512d p = _mm512_set1_pd(static_cast(modulus)); + __m512i v_modulus = _mm512_set1_epi64(modulus); + __m512i v_twice_mod = _mm512_set1_epi64(modulus * 2); + + // Add epsilon to ensure u * p >= 1.0 + // See Proposition 13 of https://arxiv.org/pdf/1407.3383.pdf + double ubar = (1.0 + std::numeric_limits::epsilon()) / modulus; + __m512d u = _mm512_set1_pd(ubar); + + const __m512i* vp_operand1 = reinterpret_cast(operand1); + const __m512i* vp_operand2 = reinterpret_cast(operand2); + __m512i* vp_result = reinterpret_cast<__m512i*>(result); + + bool no_reduce_mod = (InputModFactor * modulus) < MaximumValue(50); + if (no_reduce_mod) { // No input modulus reduction necessary + EltwiseMultModAVX512FloatLoop<1>(vp_result, vp_operand1, vp_operand2, u, p, + v_modulus, v_twice_mod, n); + } else { + EltwiseMultModAVX512FloatLoop( + vp_result, vp_operand1, vp_operand2, u, p, v_modulus, v_twice_mod, n); + } + + HEXL_CHECK_BOUNDS(result, n, modulus, + "post-mult value in operand1 exceeds bound " << modulus); +} + +#endif // HEXL_HAS_AVX512DQ + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-mult-mod-internal.hpp b/intel-hexl/eltwise/eltwise-mult-mod-internal.hpp new file mode 100644 index 00000000..1081ff84 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-mult-mod-internal.hpp @@ -0,0 +1,87 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "eltwise/eltwise-mult-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-reduce-mod.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +/// @brief Multiplies two vectors elementwise with modular reduction +/// @param[in] result Result of element-wise multiplication +/// @param[in] operand1 Vector of elements to multiply. Each element must be +/// less than the modulus. +/// @param[in] operand2 Vector of elements to multiply. Each element must be +/// less than the modulus. +/// @param[in] n Number of elements in each vector +/// @param[in] modulus Modulus with which to perform modular reduction +/// @param[in] input_mod_factor Assumes input elements are in [0, +/// input_mod_factor * p) Must be 1, 2 or 4. +/// @details Computes \p result[i] = (\p operand1[i] * \p operand2[i]) mod \p +/// modulus for i=0, ..., \p n - 1 +/// @details Algorithm 1 of +/// https://hal.archives-ouvertes.fr/hal-01215845/document +template +void EltwiseMultModNative(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, + uint64_t modulus) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(operand2 != nullptr, "Require operand2 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(modulus < (1ULL << 62), "Require modulus < (1ULL << 62)"); + HEXL_CHECK_BOUNDS(operand1, n, InputModFactor * modulus); + HEXL_CHECK_BOUNDS(operand2, n, InputModFactor * modulus); + + const uint64_t logmod = uint64_t(floorl(std::log2l(modulus)) - 1); + // modulus < 2**N + const uint64_t N = logmod + 1; + uint64_t L = 63 + N; // Ensures L - N + 1 == 64 + uint64_t op_hi = uint64_t(1) << (L - 64); + uint64_t op_lo = uint64_t(0); + uint64_t barr_lo = DivideUInt128UInt64Lo(op_hi, op_lo, modulus); + + const uint64_t twice_modulus = 2 * modulus; + + HEXL_LOOP_UNROLL_4 + for (size_t i = 0; i < n; ++i) { + uint64_t prod_hi, prod_lo, c2_hi, c2_lo, c4; + + uint64_t x = ReduceMod(*operand1, modulus, &twice_modulus); + uint64_t y = ReduceMod(*operand2, modulus, &twice_modulus); + + // Multiply inputs + MultiplyUInt64(x, y, &prod_hi, &prod_lo); + // C1 = D >> (N-1) + + uint64_t c1 = (prod_lo >> (N - 1)) + (prod_hi << (64 - (N - 1))); + + // C2 = C1 * barr_lo + MultiplyUInt64(c1, barr_lo, &c2_hi, &c2_lo); + + // C3 = C2 >> (L - N + 1) + // L - N + 1 == 64, so we only need high 64 bits + uint64_t c3 = c2_hi; + + // C4 = prod_lo - (p * c3)_lo + c4 = prod_lo - c3 * modulus; + + // Conditional subtraction + *result = (c4 >= modulus) ? (c4 - modulus) : c4; + + ++operand1; + ++operand2; + ++result; + } +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-mult-mod.cpp b/intel-hexl/eltwise/eltwise-mult-mod.cpp new file mode 100644 index 00000000..f4e5617d --- /dev/null +++ b/intel-hexl/eltwise/eltwise-mult-mod.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/eltwise/eltwise-mult-mod.hpp" + +#include "eltwise/eltwise-mult-mod-avx512.hpp" +#include "eltwise/eltwise-mult-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-reduce-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" +#include "util/check.hpp" +#include "util/cpu-features.hpp" + +namespace intel { +namespace hexl { + +void EltwiseMultMod(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, uint64_t modulus, + uint64_t input_mod_factor) { + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(operand1 != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(operand2 != nullptr, "Require operand2 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK(input_mod_factor * modulus < (1ULL << 63), + "Require input_mod_factor * modulus < (1ULL << 63)"); + HEXL_CHECK( + input_mod_factor == 1 || input_mod_factor == 2 || input_mod_factor == 4, + "Require input_mod_factor = 1, 2, or 4") + HEXL_CHECK_BOUNDS(operand1, n, input_mod_factor * modulus) + HEXL_CHECK_BOUNDS(operand2, n, input_mod_factor * modulus) + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq) { + if (modulus < (1ULL << 50)) { + switch (input_mod_factor) { + case 1: + EltwiseMultModAVX512Float<1>(result, operand1, operand2, n, modulus); + break; + case 2: + EltwiseMultModAVX512Float<2>(result, operand1, operand2, n, modulus); + break; + case 4: + EltwiseMultModAVX512Float<4>(result, operand1, operand2, n, modulus); + break; + } + return; + } else { + switch (input_mod_factor) { + case 1: + EltwiseMultModAVX512Int<1>(result, operand1, operand2, n, modulus); + break; + case 2: + EltwiseMultModAVX512Int<2>(result, operand1, operand2, n, modulus); + break; + case 4: + EltwiseMultModAVX512Int<4>(result, operand1, operand2, n, modulus); + break; + } + return; + } + } +#endif + + HEXL_VLOG(3, "Calling EltwiseMultModNative"); + switch (input_mod_factor) { + case 1: + EltwiseMultModNative<1>(result, operand1, operand2, n, modulus); + break; + case 2: + EltwiseMultModNative<2>(result, operand1, operand2, n, modulus); + break; + case 4: + EltwiseMultModNative<4>(result, operand1, operand2, n, modulus); + break; + } + return; +} +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-reduce-mod-avx512.cpp b/intel-hexl/eltwise/eltwise-reduce-mod-avx512.cpp new file mode 100644 index 00000000..907d4792 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-reduce-mod-avx512.cpp @@ -0,0 +1,108 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "eltwise/eltwise-reduce-mod-avx512.hpp" + +#include +#include + +#include "eltwise/eltwise-reduce-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-reduce-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +void EltwiseReduceModAVX512(uint64_t* result, const uint64_t* operand, + uint64_t modulus, uint64_t n, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(operand != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK( + input_mod_factor == 0 || input_mod_factor == 2 || input_mod_factor == 4, + "input_mod_factor must be 0 or 2 or 4" << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 2, + "output_mod_factor must be 1 or 2 " << output_mod_factor); + HEXL_CHECK(input_mod_factor != output_mod_factor, + "input_mod_factor must not be equal to output_mod_factor "); + + uint64_t n_tmp = n; + uint64_t barrett_factor = MultiplyFactor(1, 64, modulus).BarrettFactor(); + __m512i v_bf = _mm512_set1_epi64(barrett_factor); + + // Deals with n not divisible by 8 + uint64_t n_mod_8 = n_tmp % 8; + if (n_mod_8 != 0) { + EltwiseReduceModNative(result, operand, modulus, n_mod_8, input_mod_factor, + output_mod_factor); + operand += n_mod_8; + result += n_mod_8; + n_tmp -= n_mod_8; + } + + uint64_t twice_mod = modulus << 1; + const __m512i* v_operand = reinterpret_cast(operand); + __m512i* v_result = reinterpret_cast<__m512i*>(result); + __m512i v_modulus = _mm512_set1_epi64(modulus); + __m512i v_twice_mod = _mm512_set1_epi64(twice_mod); + + switch (input_mod_factor) { + case 0: + for (size_t i = 0; i < n_tmp; i += 8) { + __m512i v_op = _mm512_loadu_si512(v_operand); + v_op = _mm512_hexl_barrett_reduce64(v_op, v_modulus, v_bf); + HEXL_CHECK_BOUNDS(ExtractValues(v_op).data(), 8, modulus); + _mm512_storeu_si512(v_result, v_op); + ++v_operand; + ++v_result; + } + break; + + case 2: + for (size_t i = 0; i < n_tmp; i += 8) { + __m512i v_op = _mm512_loadu_si512(v_operand); + v_op = _mm512_hexl_small_mod_epu64(v_op, v_modulus); + HEXL_CHECK_BOUNDS(ExtractValues(v_op).data(), 8, modulus); + _mm512_storeu_si512(v_result, v_op); + ++v_operand; + ++v_result; + } + break; + + case 4: + if (output_mod_factor == 1) { + for (size_t i = 0; i < n_tmp; i += 8) { + __m512i v_op = _mm512_loadu_si512(v_operand); + v_op = _mm512_hexl_small_mod_epu64(v_op, v_twice_mod); + v_op = _mm512_hexl_small_mod_epu64(v_op, v_modulus); + HEXL_CHECK_BOUNDS(ExtractValues(v_op).data(), 8, modulus); + _mm512_storeu_si512(v_result, v_op); + ++v_operand; + ++v_result; + } + } + if (output_mod_factor == 2) { + for (size_t i = 0; i < n_tmp; i += 8) { + __m512i v_op = _mm512_loadu_si512(v_operand); + v_op = _mm512_hexl_small_mod_epu64(v_op, v_twice_mod); + HEXL_CHECK_BOUNDS(ExtractValues(v_op).data(), 8, twice_mod); + _mm512_storeu_si512(v_result, v_op); + ++v_operand; + ++v_result; + } + } + break; + } +} + +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-reduce-mod-avx512.hpp b/intel-hexl/eltwise/eltwise-reduce-mod-avx512.hpp new file mode 100644 index 00000000..9b770d79 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-reduce-mod-avx512.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace intel { +namespace hexl { +void EltwiseReduceModAVX512(uint64_t* result, const uint64_t* operand, + uint64_t modulus, uint64_t n, + uint64_t input_mod_factor, + uint64_t output_mod_factor); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-reduce-mod-internal.hpp b/intel-hexl/eltwise/eltwise-reduce-mod-internal.hpp new file mode 100644 index 00000000..2af82f08 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-reduce-mod-internal.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +namespace intel { +namespace hexl { + +// @brief Performs elementwise modular reduction +// @param[out] result Stores result +// @param[in] operand Vector of elements +// @param[in] n Number of elements in operand +// @param[in] modulus Modulus with which to perform modular reduction +// @param[in] input_mod_factor Assumes input elements are in [0, +// input_mod_factor * p) Must be 0, 2 or 4. input_mod_factor=0 means, no +// knowledge of input range. Barrett reduction will be used in this case +// input_mod_factor > output_mod_factor unless input_mod_factor == 0 +// @param[in] output_mod_factor output elements will be in [0, output_mod_factor +// * p) Must be 1 or 2. for input_mod_factor=0, output_mod_factor will be set +// to 1. + +void EltwiseReduceModNative(uint64_t* result, const uint64_t* operand, + uint64_t modulus, uint64_t n, + uint64_t input_mod_factor, + uint64_t output_mod_factor); +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/eltwise/eltwise-reduce-mod.cpp b/intel-hexl/eltwise/eltwise-reduce-mod.cpp new file mode 100644 index 00000000..98744150 --- /dev/null +++ b/intel-hexl/eltwise/eltwise-reduce-mod.cpp @@ -0,0 +1,117 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/eltwise/eltwise-reduce-mod.hpp" + +#include "eltwise/eltwise-reduce-mod-avx512.hpp" +#include "eltwise/eltwise-reduce-mod-internal.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "util/check.hpp" +#include "util/cpu-features.hpp" + +namespace intel { +namespace hexl { + +void EltwiseReduceModNative(uint64_t* result, const uint64_t* operand, + uint64_t modulus, uint64_t n, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(operand != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK( + input_mod_factor == 0 || input_mod_factor == 2 || input_mod_factor == 4, + "input_mod_factor must be 0 or 2 or 4" << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 2, + "output_mod_factor must be 1 or 2 " << output_mod_factor); + HEXL_CHECK(input_mod_factor != output_mod_factor, + "input_mod_factor must not be equal to output_mod_factor "); + + uint64_t barrett_factor = MultiplyFactor(1, 64, modulus).BarrettFactor(); + + uint64_t twice_mod = modulus << 1; + switch (input_mod_factor) { + case 0: + for (size_t i = 0; i < n; ++i) { + if (operand[i] >= modulus) { + result[i] = BarrettReduce64(operand[i], modulus, barrett_factor); + } else { + result[i] = operand[i]; + } + } + HEXL_CHECK_BOUNDS(result, n, modulus); + break; + + case 2: + for (size_t i = 0; i < n; ++i) { + if (operand[i] >= modulus) { + result[i] = operand[i] - modulus; + } else { + result[i] = operand[i]; + } + } + HEXL_CHECK_BOUNDS(result, n, modulus); + break; + + case 4: + if (output_mod_factor == 1) { + for (size_t i = 0; i < n; ++i) { + if (operand[i] >= twice_mod) { + result[i] = operand[i] - twice_mod; + } else { + result[i] = operand[i]; + } + if (result[i] >= modulus) { + result[i] -= modulus; + } + } + HEXL_CHECK_BOUNDS(result, n, modulus); + } + if (output_mod_factor == 2) { + for (size_t i = 0; i < n; ++i) { + if (operand[i] >= twice_mod) { + result[i] = operand[i] - twice_mod; + } else { + result[i] = operand[i]; + } + } + HEXL_CHECK_BOUNDS(result, n, twice_mod); + } + break; + } +} + +void EltwiseReduceMod(uint64_t* result, const uint64_t* operand, + uint64_t modulus, uint64_t n, uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(operand != nullptr, "Require operand1 != nullptr"); + HEXL_CHECK(result != nullptr, "Require result != nullptr"); + HEXL_CHECK(n != 0, "Require n != 0"); + HEXL_CHECK(modulus > 1, "Require modulus > 1"); + HEXL_CHECK( + input_mod_factor == 0 || input_mod_factor == 2 || input_mod_factor == 4, + "input_mod_factor must be 0 or 2 or 4" << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 2, + "output_mod_factor must be 1 or 2 " << output_mod_factor); + + if (input_mod_factor == output_mod_factor && (operand != result)) { + for (size_t i = 0; i < n; ++i) { + result[i] = operand[i]; + } + return; + } +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq) { + EltwiseReduceModAVX512(result, operand, modulus, n, input_mod_factor, + output_mod_factor); + return; + } +#endif + HEXL_VLOG(3, "Calling EltwiseReduceModNative"); + EltwiseReduceModNative(result, operand, modulus, n, input_mod_factor, + output_mod_factor); +} +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/eltwise/eltwise-add-mod.hpp b/intel-hexl/include/intel-hexl/eltwise/eltwise-add-mod.hpp new file mode 100644 index 00000000..dfd8ce2a --- /dev/null +++ b/intel-hexl/include/intel-hexl/eltwise/eltwise-add-mod.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace intel { +namespace hexl { + +/// @brief Adds two vectors elementwise with modular reduction +/// @param[out] result Stores result +/// @param[in] operand1 Vector of elements to add. Each element must be less +/// than the modulus +/// @param[in] operand2 Vector of elements to add. Each element must be less +/// than the modulus +/// @param[in] n Number of elements in each vector +/// @param[in] modulus Modulus with which to perform modular reduction. Must be +/// in the range \f$[2, 2^{63} - 1]\f$ +/// @details Computes \f$ operand1[i] = (operand1[i] + operand2[i]) \mod modulus +/// \f$ for \f$ i=0, ..., n-1\f$. +void EltwiseAddMod(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, uint64_t modulus); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-add.hpp b/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-add.hpp new file mode 100644 index 00000000..66edf6eb --- /dev/null +++ b/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-add.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +/// @brief Computes element-wise conditional addition. +/// @param[out] result Stores the result +/// @param[in] operand1 Vector of elements to compare; stores result +/// @param[in] cmp Comparison operation +/// @param[in] bound Scalar to compare against +/// @param[in] diff Scalar to conditionally add +/// @param[in] n Number of elements in \p operand1 +/// @details Computes result[i] = cmp(operand1[i], bound) ? operand1[i] + +/// diff : operand1[i] for all \f$i=0, ..., n-1\f$. +void EltwiseCmpAdd(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t n); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp b/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp new file mode 100644 index 00000000..77ca7d04 --- /dev/null +++ b/intel-hexl/include/intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +/// @brief Computes element-wise conditional modular subtraction. +/// @param[out] result Stores the result +/// @param[in] operand1 Vector of elements to compare +/// @param[in] cmp Comparison function +/// @param[in] bound Scalar to compare against +/// @param[in] diff Scalar to subtract by +/// @param[in] modulus Modulus to reduce by +/// @param[in] n Number of elements in \p operand1 +/// @details Computes \p operand1[i] = (\p cmp(\p operand1, \p bound)) ? (\p +/// operand1 - \p diff) mod \p modulus : \p operand1 for all i=0, ..., n-1 +void EltwiseCmpSubMod(uint64_t* result, const uint64_t* operand1, CMPINT cmp, + uint64_t bound, uint64_t diff, uint64_t modulus, + uint64_t n); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/eltwise/eltwise-fma-mod.hpp b/intel-hexl/include/intel-hexl/eltwise/eltwise-fma-mod.hpp new file mode 100644 index 00000000..8decc280 --- /dev/null +++ b/intel-hexl/include/intel-hexl/eltwise/eltwise-fma-mod.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace intel { +namespace hexl { + +/// @brief Computes fused multiply-add (\p arg1 * \p arg2 + \p arg3) mod \p +/// modulus element-wise, broadcasting scalars to vectors. +/// @param[out] result Stores the result +/// @param[in] arg1 Vector to multiply +/// @param[in] arg2 Scalar to multiply +/// @param[in] arg3 Vector to add. Will not add if \p arg3 == nullptr +/// @param[in] n Number of elements in each vector +/// @param[in] modulus Modulus with which to perform modular reduction. Must be +/// in the range \f$ [2, 2^{61} - 1]\f$ +/// @param[in] input_mod_factor Assumes input elements are in [0, +/// input_mod_factor * p). Must be 1, 2, 4, or 8. +void EltwiseFMAMod(uint64_t* result, const uint64_t* arg1, uint64_t arg2, + const uint64_t* arg3, uint64_t n, uint64_t modulus, + uint64_t input_mod_factor); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/eltwise/eltwise-mult-mod.hpp b/intel-hexl/include/intel-hexl/eltwise/eltwise-mult-mod.hpp new file mode 100644 index 00000000..1dee97e7 --- /dev/null +++ b/intel-hexl/include/intel-hexl/eltwise/eltwise-mult-mod.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace intel { +namespace hexl { + +/// @brief Multiplies two vectors elementwise with modular reduction +/// @param[in] result Result of element-wise multiplication +/// @param[in] operand1 Vector of elements to multiply. Each element must be +/// less than the modulus. +/// @param[in] operand2 Vector of elements to multiply. Each element must be +/// less than the modulus. +/// @param[in] n Number of elements in each vector +/// @param[in] modulus Modulus with which to perform modular reduction +/// @param[in] input_mod_factor Assumes input elements are in [0, +/// input_mod_factor * p) Must be 1, 2 or 4. +/// @details Computes \p result[i] = (\p operand1[i] * \p operand2[i]) mod \p +/// modulus for i=0, ..., \p n - 1 +void EltwiseMultMod(uint64_t* result, const uint64_t* operand1, + const uint64_t* operand2, uint64_t n, uint64_t modulus, + uint64_t input_mod_factor); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/eltwise/eltwise-reduce-mod.hpp b/intel-hexl/include/intel-hexl/eltwise/eltwise-reduce-mod.hpp new file mode 100644 index 00000000..98042651 --- /dev/null +++ b/intel-hexl/include/intel-hexl/eltwise/eltwise-reduce-mod.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace intel { +namespace hexl { + +/// @brief Performs elementwise modular reduction +/// @param[out] result Stores the result +/// @param[in] operand +/// @param[in] n Number of elements in operand +/// @param[in] modulus Modulus with which to perform modular reduction +/// @param[in] input_mod_factor Assumes input elements are in [0, +/// input_mod_factor * p) Must be 0, 1, 2 or 4. input_mod_factor=0 means, no +/// knowledge of input range. Barrett reduction will be used in this case. +/// input_mod_factor >= output_mod_factor unless input_mod_factor == 0 +/// @param[in] output_mod_factor output elements will be in [0, +/// output_mod_factor +/// * p) Must be 1 or 2. for input_mod_factor=0, output_mod_factor will be set +/// to 1. +void EltwiseReduceMod(uint64_t* result, const uint64_t* operand, + uint64_t modulus, uint64_t n, uint64_t input_mod_factor, + uint64_t output_mod_factor); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/intel-hexl.hpp b/intel-hexl/include/intel-hexl/intel-hexl.hpp new file mode 100644 index 00000000..7bfc2c5c --- /dev/null +++ b/intel-hexl/include/intel-hexl/intel-hexl.hpp @@ -0,0 +1,13 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "intel-hexl/eltwise/eltwise-add-mod.hpp" +#include "intel-hexl/eltwise/eltwise-cmp-add.hpp" +#include "intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp" +#include "intel-hexl/eltwise/eltwise-fma-mod.hpp" +#include "intel-hexl/eltwise/eltwise-mult-mod.hpp" +#include "intel-hexl/eltwise/eltwise-reduce-mod.hpp" +#include "intel-hexl/ntt/ntt.hpp" +#include "intel-hexl/util/util.hpp" diff --git a/intel-hexl/include/intel-hexl/ntt/ntt.hpp b/intel-hexl/include/intel-hexl/ntt/ntt.hpp new file mode 100644 index 00000000..fe7cc6e5 --- /dev/null +++ b/intel-hexl/include/intel-hexl/ntt/ntt.hpp @@ -0,0 +1,72 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include +#include + +namespace intel { +namespace hexl { + +/// @brief Performs negacyclic forward and inverse number-theoretic transform +/// (NTT), commonly used in RLWE cryptography. +/// @details The number-theoretic transform (NTT) specializes the discrete +/// Fourier transform (DFT) to the finite field \f$ \mathbb{Z}_p / (X^N + 1) +/// \f$. +class NTT { + public: + /// Initializes an empty NTT object + NTT(); + + /// Destructs the NTT object + ~NTT(); + + /// Initializes an NTT object with degree \p degree and modulus \p p. + /// @param[in] degree a.k.a. N. Size of the NTT transform. Must be a power of + /// 2 + /// @param[in] p Prime modulus. Must satisfy \f$ p == 1 \mod 2N \f$ + /// @brief Performs pre-computation necessary for forward and inverse + /// transforms + NTT(uint64_t degree, uint64_t p); + + /// @brief Initializes an NTT object with degree \p degree and modulus + /// \p p. + /// @param[in] degree a.k.a. N. Size of the NTT transform. Must be a power of + /// 2 + /// @param[in] p Prime modulus. Must satisfy \f$ p == 1 \mod 2N \f$ + /// @param[in] root_of_unity 2N'th root of unity in \f$ \mathbb{Z_p} \f$. + /// @details Performs pre-computation necessary for forward and inverse + /// transforms + NTT(uint64_t degree, uint64_t p, uint64_t root_of_unity); + + /// @brief Compute forward NTT. Results are bit-reversed. + /// @param[out] result Stores the result + /// @param[in] operand Data on which to compute the NTT + /// @param[in] input_mod_factor Assume input \p operand are in [0, + /// input_mod_factor * p). Must be 1, 2 or 4. + /// @param[in] output_mod_factor Returns output \p operand in [0, + /// output_mod_factor * p). Must be 1 or 4. + void ComputeForward(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, uint64_t output_mod_factor); + + /// Compute inverse NTT. Results are bit-reversed. + /// @param[out] result Stores the result + /// @param[in] operand Data on which to compute the NTT + /// @param[in] input_mod_factor Assume input \p operand are in [0, + /// input_mod_factor * p). Must be 1 or 2. + /// @param[in] output_mod_factor Returns output \p operand in [0, + /// output_mod_factor * p). Must be 1 or 2. + void ComputeInverse(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, uint64_t output_mod_factor); + + class NTTImpl; /// Class implementing the NTT + + private: + std::shared_ptr m_impl; +}; + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/include/intel-hexl/util/util.hpp b/intel-hexl/include/intel-hexl/util/util.hpp new file mode 100644 index 00000000..25585bbb --- /dev/null +++ b/intel-hexl/include/intel-hexl/util/util.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +namespace intel { +namespace hexl { + +#undef TRUE // MSVC defines TRUE +#undef FALSE // MSVC defines FALSE + +/// @enum CMPINT +/// @brief Represents binary operations between two boolean values +enum class CMPINT { + EQ = 0, ///< Equal + LT = 1, ///< Less than + LE = 2, ///< Less than or equal + FALSE = 3, ///< False + NE = 4, ///< Not equal + NLT = 5, ///< Not less than + NLE = 6, ///< Not less than or equal + TRUE = 7 ///< True +}; + +/// @brief Returns the logical negation of a binary operation +/// @param[in] cmp The binary operation to negate +inline CMPINT Not(CMPINT cmp) { + switch (cmp) { + case CMPINT::EQ: + return CMPINT::NE; + case CMPINT::LT: + return CMPINT::NLT; + case CMPINT::LE: + return CMPINT::NLE; + case CMPINT::FALSE: + return CMPINT::TRUE; + case CMPINT::NE: + return CMPINT::EQ; + case CMPINT::NLT: + return CMPINT::LT; + case CMPINT::NLE: + return CMPINT::LE; + case CMPINT::TRUE: + return CMPINT::FALSE; + default: + return CMPINT::FALSE; + } +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/logging/logging.cpp b/intel-hexl/logging/logging.cpp new file mode 100644 index 00000000..f10dd322 --- /dev/null +++ b/intel-hexl/logging/logging.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "logging/logging.hpp" + +#ifdef HEXL_DEBUG + +#include + +INITIALIZE_EASYLOGGINGPP; + +DEFINE_int32(v, 0, + "enable verbose (DEBUG) logging. Increasing verbosity from 1 to 5 " + "(maximum debugging)"); + +el::Configurations LogConfigurationFromFlags() { + el::Configurations conf; + conf.setToDefault(); + conf.set(el::Level::Global, el::ConfigurationType::ToFile, "false"); + + if (FLAGS_v) { + el::Loggers::setVerboseLevel(FLAGS_v); + } else { + conf.set(el::Level::Debug, el::ConfigurationType::Enabled, "false"); + } + return conf; +} + +#endif diff --git a/intel-hexl/logging/logging.hpp b/intel-hexl/logging/logging.hpp new file mode 100644 index 00000000..72aa1413 --- /dev/null +++ b/intel-hexl/logging/logging.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +// Wrap HEXL_VLOG with HEXL_DEBUG; this ensures no logging overhead in +// release mode +#ifdef HEXL_DEBUG + +// TODO(fboemer) Enable if needed +// #define ELPP_THREAD_SAFE +#define ELPP_CUSTOM_COUT std::cerr +#define ELPP_STL_LOGGING +#define ELPP_LOG_STD_ARRAY +#define ELPP_LOG_UNORDERED_MAP +#define ELPP_LOG_UNORDERED_SET +#define ELPP_NO_LOG_TO_FILE +#define ELPP_DISABLE_DEFAULT_CRASH_HANDLING +#define ELPP_WINSOCK2 + +#include + +#define HEXL_VLOG(N, rest) \ + do { \ + if (VLOG_IS_ON(N)) { \ + VLOG(N) << rest; \ + } \ + } while (0); + +#else + +#define HEXL_VLOG(N, rest) \ + {} + +#define START_EASYLOGGINGPP(X, Y) \ + {} + +#endif diff --git a/intel-hexl/ntt/fwd-ntt-avx512.cpp b/intel-hexl/ntt/fwd-ntt-avx512.cpp new file mode 100644 index 00000000..6fcd4674 --- /dev/null +++ b/intel-hexl/ntt/fwd-ntt-avx512.cpp @@ -0,0 +1,31 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "ntt/fwd-ntt-avx512.hpp" + +#include "intel-hexl/ntt/ntt.hpp" +#include "ntt/ntt-internal.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512IFMA +template void +ForwardTransformToBitReverseAVX512( + uint64_t* operand, uint64_t degree, uint64_t mod, + const uint64_t* root_of_unity_powers, + const uint64_t* precon_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor); +#endif + +#ifdef HEXL_HAS_AVX512DQ +template void +ForwardTransformToBitReverseAVX512( + uint64_t* operand, uint64_t degree, uint64_t mod, + const uint64_t* root_of_unity_powers, + const uint64_t* precon_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor); +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/ntt/fwd-ntt-avx512.hpp b/intel-hexl/ntt/fwd-ntt-avx512.hpp new file mode 100644 index 00000000..286e6fc8 --- /dev/null +++ b/intel-hexl/ntt/fwd-ntt-avx512.hpp @@ -0,0 +1,278 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include +#include + +#include "intel-hexl/ntt/ntt.hpp" +#include "logging/logging.hpp" +#include "ntt/ntt-avx512-util.hpp" +#include "ntt/ntt-internal.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +/// @brief The Harvey butterfly: assume \p X, \p Y in [0, 4p), and return X', Y' +/// in [0, 4p) such that X', Y' = X + WY, X - WY (mod p). +/// @param[in,out] X Input representing 8 64-bit signed integers in SIMD form +/// @param[in,out] Y Input representing 8 64-bit signed integers in SIMD form +/// @param[in] W_op Input representing 8 64-bit signed integers in SIMD form +/// @param[in] W_precon Preconditioned \p W_op for BitShift-bit Barrett +/// reduction +/// @param[in] neg_modulus Negative modulus, i.e. (-p) represented as 8 64-bit +/// signed integers in SIMD form +/// @param[in] twice_modulus Twice the modulus, i.e. 2*p represented as 8 64-bit +/// signed integers in SIMD form +/// @param InputLessThanMod If true, assumes \p X, \p Y < \p p. Otherwise, +/// assumes \p X, \p Y < 4*\p p +/// @details See Algorithm 4 of https://arxiv.org/pdf/1205.2926.pdf +template +inline void FwdButterfly(__m512i* X, __m512i* Y, __m512i W_op, __m512i W_precon, + __m512i neg_modulus, __m512i twice_modulus) { + if (!InputLessThanMod) { + *X = _mm512_hexl_small_mod_epu64(*X, twice_modulus); + } + __m512i Q = _mm512_hexl_mulhi_epi(W_precon, *Y); + __m512i W_Y = _mm512_hexl_mullo_epi(W_op, *Y); + __m512i T = _mm512_hexl_mullo_add_epi(W_Y, Q, neg_modulus); + + // Discard high 12 bits if BitShift == 52; deals with case when + // W*Y < Q*p in the low BitShift bits. + if (BitShift == 52) { + T = _mm512_and_epi64(T, _mm512_set1_epi64((1ULL << 52) - 1)); + } + __m512i twice_mod_minus_T = _mm512_sub_epi64(twice_modulus, T); + *Y = _mm512_add_epi64(*X, twice_mod_minus_T); + *X = _mm512_add_epi64(*X, T); +} + +template +void FwdT1(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t m, const uint64_t* W_op, const uint64_t* W_precon) { + const __m512i* v_W_op_pt = reinterpret_cast(W_op); + const __m512i* v_W_precon_pt = reinterpret_cast(W_precon); + size_t j1 = 0; + + // 8 | m guaranteed by n >= 16 + HEXL_LOOP_UNROLL_8 + for (size_t i = m / 8; i > 0; --i) { + uint64_t* X = operand + j1; + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + + __m512i v_X; + __m512i v_Y; + LoadFwdInterleavedT1(X, &v_X, &v_Y); + __m512i v_W_op = _mm512_loadu_si512(v_W_op_pt++); + __m512i v_W_precon = _mm512_loadu_si512(v_W_precon_pt++); + + FwdButterfly(&v_X, &v_Y, v_W_op, v_W_precon, v_neg_modulus, + v_twice_mod); + WriteFwdInterleavedT1(v_X, v_Y, v_X_pt); + + j1 += 16; + } +} + +template +void FwdT2(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t m, const uint64_t* W_op, const uint64_t* W_precon) { + size_t j1 = 0; + // 4 | m guaranteed by n >= 16 + HEXL_LOOP_UNROLL_4 + for (size_t i = m / 4; i > 0; --i) { + uint64_t* X = operand + j1; + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + + __m512i v_X; + __m512i v_Y; + LoadFwdInterleavedT2(X, &v_X, &v_Y); + + __m512i v_W_op = LoadWOpT2(static_cast(W_op)); + __m512i v_W_precon = LoadWOpT2(static_cast(W_precon)); + + FwdButterfly(&v_X, &v_Y, v_W_op, v_W_precon, v_neg_modulus, + v_twice_mod); + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_X_pt, v_Y); + + W_op += 4; + W_precon += 4; + + j1 += 16; + } +} + +template +void FwdT4(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t m, const uint64_t* W_op, const uint64_t* W_precon) { + size_t j1 = 0; + + // 2 | m guaranteed by n >= 16 + HEXL_LOOP_UNROLL_4 + for (size_t i = m / 2; i > 0; --i) { + uint64_t* X = operand + j1; + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + + __m512i v_X; + __m512i v_Y; + LoadFwdInterleavedT4(X, &v_X, &v_Y); + + __m512i v_W_op = LoadWOpT4(static_cast(W_op)); + __m512i v_W_precon = LoadWOpT4(static_cast(W_precon)); + + FwdButterfly(&v_X, &v_Y, v_W_op, v_W_precon, v_neg_modulus, + v_twice_mod); + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_X_pt, v_Y); + + j1 += 16; + W_op += 2; + W_precon += 2; + } +} + +template +void FwdT8(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t t, uint64_t m, const uint64_t* W_op, + const uint64_t* W_precon) { + size_t j1 = 0; + + HEXL_LOOP_UNROLL_4 + for (size_t i = 0; i < m; i++) { + uint64_t* X = operand + j1; + uint64_t* Y = X + t; + + __m512i v_W_op = _mm512_set1_epi64(*W_op++); + __m512i v_W_precon = _mm512_set1_epi64(*W_precon++); + + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + __m512i* v_Y_pt = reinterpret_cast<__m512i*>(Y); + + // assume 8 | t + for (size_t j = t / 8; j > 0; --j) { + __m512i v_X = _mm512_loadu_si512(v_X_pt); + __m512i v_Y = _mm512_loadu_si512(v_Y_pt); + + FwdButterfly(&v_X, &v_Y, v_W_op, v_W_precon, + v_neg_modulus, v_twice_mod); + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_Y_pt++, v_Y); + } + j1 += (t << 1); + } +} + +template +void ForwardTransformToBitReverseAVX512( + uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* root_of_unity_powers, + const uint64_t* precon_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(CheckNTTArguments(n, mod), ""); + HEXL_CHECK(mod < MaximumValue(BitShift) / 4, + "mod " << mod << " too large for BitShift " << BitShift + << " => maximum value " << MaximumValue(BitShift) / 4); + HEXL_CHECK_BOUNDS(precon_root_of_unity_powers, n, MaximumValue(BitShift), + "precon_root_of_unity_powers too large"); + HEXL_CHECK_BOUNDS(operand, n, MaximumValue(BitShift), "operand too large"); + HEXL_CHECK_BOUNDS(operand, n, input_mod_factor * mod, + "operand larger than input_mod_factor * modulus (" + << input_mod_factor << " * " << mod << ")"); + HEXL_CHECK(n >= 16, + "Don't support small transforms. Need n > 16, got n = " << n); + HEXL_CHECK( + input_mod_factor == 1 || input_mod_factor == 2 || input_mod_factor == 4, + "input_mod_factor must be 1, 2, or 4; got " << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 4, + "output_mod_factor must be 1 or 4; got " << output_mod_factor); + + uint64_t twice_mod = mod << 1; + + __m512i v_modulus = _mm512_set1_epi64(mod); + __m512i v_neg_modulus = _mm512_set1_epi64(-static_cast(mod)); + __m512i v_twice_mod = _mm512_set1_epi64(twice_mod); + + HEXL_VLOG(5, "root_of_unity_powers " << std::vector( + root_of_unity_powers, root_of_unity_powers + n)) + HEXL_VLOG(5, + "precon_root_of_unity_powers " << std::vector( + precon_root_of_unity_powers, precon_root_of_unity_powers + n)); + + HEXL_VLOG(5, "operand " << std::vector(operand, operand + n)); + + size_t t = (n >> 1); + size_t m = 1; + // First iteration assumes input in [0,p) + if (m < (n >> 3)) { + const uint64_t* W_op = &root_of_unity_powers[m]; + const uint64_t* W_precon = &precon_root_of_unity_powers[m]; + if (input_mod_factor <= 2) { + FwdT8(operand, v_neg_modulus, v_twice_mod, t, m, W_op, + W_precon); + } else { + FwdT8(operand, v_neg_modulus, v_twice_mod, t, m, W_op, + W_precon); + } + + t >>= 1; + m <<= 1; + } + for (; m < (n >> 3); m <<= 1) { + const uint64_t* W_op = &root_of_unity_powers[m]; + const uint64_t* W_precon = &precon_root_of_unity_powers[m]; + FwdT8(operand, v_neg_modulus, v_twice_mod, t, m, W_op, + W_precon); + t >>= 1; + } + + // Do T=1, T=2, T=4 separately + { + const uint64_t* W_op = &root_of_unity_powers[m]; + const uint64_t* W_precon = &precon_root_of_unity_powers[m]; + + FwdT4(operand, v_neg_modulus, v_twice_mod, m, W_op, W_precon); + m <<= 1; + W_op = &root_of_unity_powers[m]; + W_precon = &precon_root_of_unity_powers[m]; + FwdT2(operand, v_neg_modulus, v_twice_mod, m, W_op, W_precon); + m <<= 1; + W_op = &root_of_unity_powers[m]; + W_precon = &precon_root_of_unity_powers[m]; + FwdT1(operand, v_neg_modulus, v_twice_mod, m, W_op, W_precon); + } + + if (output_mod_factor == 1) { + // n power of two at least 8 => n divisible by 8 + HEXL_CHECK(n % 8 == 0, "n " << n << " not a power of 2"); + __m512i* v_X_pt = reinterpret_cast<__m512i*>(operand); + for (size_t i = 0; i < n; i += 8) { + __m512i v_X = _mm512_loadu_si512(v_X_pt); + + // Reduce from [0, 4p) to [0, p) + v_X = _mm512_hexl_small_mod_epu64(v_X, v_twice_mod); + v_X = _mm512_hexl_small_mod_epu64(v_X, v_modulus); + + HEXL_CHECK_BOUNDS(ExtractValues(v_X).data(), 8, mod); + + _mm512_storeu_si512(v_X_pt, v_X); + + ++v_X_pt; + } + } +} + +#endif // HEXL_HAS_AVX512DQ + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/ntt/inv-ntt-avx512.cpp b/intel-hexl/ntt/inv-ntt-avx512.cpp new file mode 100644 index 00000000..bac19172 --- /dev/null +++ b/intel-hexl/ntt/inv-ntt-avx512.cpp @@ -0,0 +1,31 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "ntt/inv-ntt-avx512.hpp" + +#include "intel-hexl/ntt/ntt.hpp" +#include "ntt/ntt-internal.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512IFMA +template void +InverseTransformFromBitReverseAVX512( + uint64_t* operand, uint64_t degree, uint64_t mod, + const uint64_t* inv_root_of_unity_powers, + const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor); +#endif + +#ifdef HEXL_HAS_AVX512DQ +template void +InverseTransformFromBitReverseAVX512( + uint64_t* operand, uint64_t degree, uint64_t mod, + const uint64_t* inv_root_of_unity_powers, + const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor); +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/ntt/inv-ntt-avx512.hpp b/intel-hexl/ntt/inv-ntt-avx512.hpp new file mode 100644 index 00000000..82fe74b7 --- /dev/null +++ b/intel-hexl/ntt/inv-ntt-avx512.hpp @@ -0,0 +1,331 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include +#include + +#include "intel-hexl/ntt/ntt.hpp" +#include "logging/logging.hpp" +#include "ntt/ntt-avx512-util.hpp" +#include "ntt/ntt-internal.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +/// @brief The Harvey butterfly: assume X, Y in [0, 2p), and return X', Y' in +/// [0, 2p). such that X', Y' = X + Y (mod p), W(X - Y) (mod p). +/// @param[in,out] X Input representing 8 64-bit signed integers in SIMD form +/// @param[in,out] Y Input representing 8 64-bit signed integers in SIMD form +/// @param[in] W_op Root of unity representing 8 64-bit signed integers in SIMD +/// form +/// @param[in] W_precon Preconditioned \p W_op for BitShift-bit Barrett +/// reduction +/// @param[in] neg_modulus Negative modulus, i.e. (-p) represented as 8 64-bit +/// signed integers in SIMD form +/// @param[in] twice_modulus Twice the modulus, i.e. 2*p represented as 8 64-bit +/// signed integers in SIMD form +/// @param InputLessThanMod If true, assumes \p X, \p Y < \p p. Otherwise, +/// assumes \p X, \p Y < 2*\p p +/// @details See Algorithm 3 of https://arxiv.org/pdf/1205.2926.pdf +template +inline void InvButterfly(__m512i* X, __m512i* Y, __m512i W_op, __m512i W_precon, + __m512i neg_modulus, __m512i twice_modulus) { + __m512i Y_minus_2p = _mm512_sub_epi64(*Y, twice_modulus); + __m512i T = _mm512_sub_epi64(*X, Y_minus_2p); + + if (InputLessThanMod) { + // No need for modulus reduction, since inputs are in [0,p) + *X = _mm512_add_epi64(*X, *Y); + } else { + *X = _mm512_add_epi64(*X, Y_minus_2p); + __mmask8 sign_bits = _mm512_movepi64_mask(*X); + *X = _mm512_mask_add_epi64(*X, sign_bits, *X, twice_modulus); + } + __m512i Q = _mm512_hexl_mulhi_epi(W_precon, T); + __m512i Q_p = _mm512_hexl_mullo_epi(Q, neg_modulus); + *Y = _mm512_hexl_mullo_add_epi(Q_p, W_op, T); + + if (BitShift == 52) { + // Discard high 12 bits; deals with case when W*T < Q*p in the low BitShift + // bits. + *Y = _mm512_and_epi64(*Y, _mm512_set1_epi64((1ULL << 52) - 1)); + } +} + +template +void InvT1(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t m, const uint64_t* W_op, const uint64_t* W_precon) { + const __m512i* v_W_op_pt = reinterpret_cast(W_op); + const __m512i* v_W_precon_pt = reinterpret_cast(W_precon); + size_t j1 = 0; + + // 8 | m guaranteed by n >= 16 + HEXL_LOOP_UNROLL_8 + for (size_t i = m / 8; i > 0; --i) { + uint64_t* X = operand + j1; + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + + __m512i v_X; + __m512i v_Y; + LoadInvInterleavedT1(X, &v_X, &v_Y); + + __m512i v_W_op = _mm512_loadu_si512(v_W_op_pt++); + __m512i v_W_precon = _mm512_loadu_si512(v_W_precon_pt++); + + InvButterfly(&v_X, &v_Y, v_W_op, v_W_precon, + v_neg_modulus, v_twice_mod); + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_X_pt, v_Y); + + j1 += 16; + } +} + +template +void InvT2(uint64_t* X, __m512i v_neg_modulus, __m512i v_twice_mod, uint64_t m, + const uint64_t* W_op, const uint64_t* W_precon) { + // 4 | m guaranteed by n >= 16 + HEXL_LOOP_UNROLL_4 + for (size_t i = m / 4; i > 0; --i) { + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + + __m512i v_X; + __m512i v_Y; + LoadInvInterleavedT2(X, &v_X, &v_Y); + + __m512i v_W_op = LoadWOpT2(static_cast(W_op)); + __m512i v_W_precon = LoadWOpT2(static_cast(W_precon)); + + InvButterfly(&v_X, &v_Y, v_W_op, v_W_precon, v_neg_modulus, + v_twice_mod); + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_X_pt, v_Y); + X += 16; + + W_op += 4; + W_precon += 4; + } +} + +template +void InvT4(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t m, const uint64_t* W_op, const uint64_t* W_precon) { + uint64_t* X = operand; + + // 2 | m guaranteed by n >= 16 + HEXL_LOOP_UNROLL_4 + for (size_t i = m / 2; i > 0; --i) { + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + + __m512i v_X; + __m512i v_Y; + LoadInvInterleavedT4(X, &v_X, &v_Y); + + __m512i v_W_op = LoadWOpT4(static_cast(W_op)); + __m512i v_W_precon = LoadWOpT4(static_cast(W_precon)); + + InvButterfly(&v_X, &v_Y, v_W_op, v_W_precon, v_neg_modulus, + v_twice_mod); + + WriteInvInterleavedT4(v_X, v_Y, v_X_pt); + X += 16; + + W_op += 2; + W_precon += 2; + } +} + +template +void InvT8(uint64_t* operand, __m512i v_neg_modulus, __m512i v_twice_mod, + uint64_t t, uint64_t m, const uint64_t* W_op, + const uint64_t* W_precon) { + size_t j1 = 0; + + HEXL_LOOP_UNROLL_4 + for (size_t i = 0; i < m; i++) { + uint64_t* X = operand + j1; + uint64_t* Y = X + t; + + __m512i v_W_op = _mm512_set1_epi64(*W_op++); + __m512i v_W_precon = _mm512_set1_epi64(*W_precon++); + + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + __m512i* v_Y_pt = reinterpret_cast<__m512i*>(Y); + + // assume 8 | t + for (size_t j = t / 8; j > 0; --j) { + __m512i v_X = _mm512_loadu_si512(v_X_pt); + __m512i v_Y = _mm512_loadu_si512(v_Y_pt); + + InvButterfly(&v_X, &v_Y, v_W_op, v_W_precon, + v_neg_modulus, v_twice_mod); + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_Y_pt++, v_Y); + } + j1 += (t << 1); + } +} + +template +void InverseTransformFromBitReverseAVX512( + uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* inv_root_of_unity_powers, + const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(CheckNTTArguments(n, mod), ""); + HEXL_CHECK(mod < MaximumValue(BitShift) / 2, + "mod " << mod << " too large for BitShift " << BitShift + << " => maximum value " << MaximumValue(BitShift) / 2); + HEXL_CHECK_BOUNDS(precon_inv_root_of_unity_powers, n, MaximumValue(BitShift)); + HEXL_CHECK_BOUNDS(operand, n, MaximumValue(BitShift), "operand too large"); + HEXL_CHECK_BOUNDS(operand, n, input_mod_factor * mod, + "operand larger than input_mod_factor * modulus (" + << input_mod_factor << " * " << mod << ")"); + HEXL_CHECK(input_mod_factor == 1 || input_mod_factor == 2, + "input_mod_factor must be 1 or 2; got " << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 2, + "output_mod_factor must be 1 or 2; got " << output_mod_factor); + + uint64_t twice_mod = mod << 1; + __m512i v_modulus = _mm512_set1_epi64(mod); + __m512i v_neg_modulus = _mm512_set1_epi64(-static_cast(mod)); + __m512i v_twice_mod = _mm512_set1_epi64(twice_mod); + + size_t t = 1; + size_t root_index = 1; + size_t m = (n >> 1); + + // Extract t=1, t=2, t=4 loops separately + { + // t = 1 + const uint64_t* W_op = &inv_root_of_unity_powers[root_index]; + const uint64_t* W_precon = &precon_inv_root_of_unity_powers[root_index]; + if (input_mod_factor == 1) { + InvT1(operand, v_neg_modulus, v_twice_mod, m, W_op, + W_precon); + } else { + InvT1(operand, v_neg_modulus, v_twice_mod, m, W_op, + W_precon); + } + t <<= 1; + root_index += m; + m >>= 1; + + // t = 2 + W_op = &inv_root_of_unity_powers[root_index]; + W_precon = &precon_inv_root_of_unity_powers[root_index]; + InvT2(operand, v_neg_modulus, v_twice_mod, m, W_op, W_precon); + + t <<= 1; + root_index += m; + m >>= 1; + + // t = 4 + W_op = &inv_root_of_unity_powers[root_index]; + W_precon = &precon_inv_root_of_unity_powers[root_index]; + InvT4(operand, v_neg_modulus, v_twice_mod, m, W_op, W_precon); + t <<= 1; + root_index += m; + m >>= 1; + } + + // t >= 8 + for (; m > 1; m >>= 1) { + const uint64_t* W_op = &inv_root_of_unity_powers[root_index]; + const uint64_t* W_precon = &precon_inv_root_of_unity_powers[root_index]; + InvT8(operand, v_neg_modulus, v_twice_mod, t, m, W_op, W_precon); + t <<= 1; + root_index += m; + } + + HEXL_VLOG(4, "AVX512 intermediate operand " + << std::vector(operand, operand + n)); + + const uint64_t W_op = inv_root_of_unity_powers[root_index]; + MultiplyFactor mf_inv_n(InverseUIntMod(n, mod), BitShift, mod); + const uint64_t inv_n = mf_inv_n.Operand(); + const uint64_t inv_n_prime = mf_inv_n.BarrettFactor(); + + MultiplyFactor mf_inv_n_w(MultiplyUIntMod(inv_n, W_op, mod), BitShift, mod); + const uint64_t inv_n_w = mf_inv_n_w.Operand(); + const uint64_t inv_n_w_prime = mf_inv_n_w.BarrettFactor(); + + HEXL_VLOG(4, "inv_n_w " << inv_n_w); + + uint64_t* X = operand; + uint64_t* Y = X + (n >> 1); + + __m512i v_inv_n = _mm512_set1_epi64(inv_n); + __m512i v_inv_n_prime = _mm512_set1_epi64(inv_n_prime); + __m512i v_inv_n_w = _mm512_set1_epi64(inv_n_w); + __m512i v_inv_n_w_prime = _mm512_set1_epi64(inv_n_w_prime); + + __m512i* v_X_pt = reinterpret_cast<__m512i*>(X); + __m512i* v_Y_pt = reinterpret_cast<__m512i*>(Y); + + const __m512i two_pow52_min1 = _mm512_set1_epi64((1ULL << 52) - 1); + + // Merge final InvNTT loop with modulus reduction baked-in + HEXL_LOOP_UNROLL_4 + for (size_t j = n / 16; j > 0; --j) { + __m512i v_X = _mm512_loadu_si512(v_X_pt); + __m512i v_Y = _mm512_loadu_si512(v_Y_pt); + + // Slightly different from regular InvButterfly because different W is used + // for X and Y + + __m512i Y_minus_2p = _mm512_sub_epi64(v_Y, v_twice_mod); + __m512i X_plus_Y_mod2p = + _mm512_hexl_small_add_mod_epi64(v_X, v_Y, v_twice_mod); + // T = *X + twice_mod - *Y + __m512i T = _mm512_sub_epi64(v_X, Y_minus_2p); + + __m512i Q1 = _mm512_hexl_mulhi_epi(v_inv_n_prime, X_plus_Y_mod2p); + // X = inv_N * X_plus_Y_mod2p - Q1 * modulus; + __m512i inv_N_tx = _mm512_hexl_mullo_epi(v_inv_n, X_plus_Y_mod2p); + v_X = _mm512_hexl_mullo_add_epi(inv_N_tx, Q1, v_neg_modulus); + if (BitShift == 52) { + // Discard high 12 bits; deals with case when W*T < Q1*p in the low + // BitShift bits. + v_X = _mm512_and_epi64(v_X, two_pow52_min1); + } + + __m512i Q2 = _mm512_hexl_mulhi_epi(v_inv_n_w_prime, T); + // Y = inv_N_W * T - Q2 * modulus; + __m512i inv_N_W_T = _mm512_hexl_mullo_epi(v_inv_n_w, T); + v_Y = _mm512_hexl_mullo_add_epi(inv_N_W_T, Q2, v_neg_modulus); + if (BitShift == 52) { + // Discard high 12 bits; deals with case when W*T < Q2*p in the low + // BitShift bits. + v_Y = _mm512_and_epi64(v_Y, two_pow52_min1); + } + + if (output_mod_factor == 1) { + // Modulus reduction from [0,2p), to [0,p) + v_X = _mm512_hexl_small_mod_epu64(v_X, v_modulus); + v_Y = _mm512_hexl_small_mod_epu64(v_Y, v_modulus); + } + + _mm512_storeu_si512(v_X_pt++, v_X); + _mm512_storeu_si512(v_Y_pt++, v_Y); + } + + HEXL_VLOG(5, "AVX512 returning operand " + << std::vector(operand, operand + n)); +} + +#endif // HEXL_HAS_AVX512DQ + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/ntt/ntt-avx512-util.hpp b/intel-hexl/ntt/ntt-avx512-util.hpp new file mode 100644 index 00000000..fa7539a0 --- /dev/null +++ b/intel-hexl/ntt/ntt-avx512-util.hpp @@ -0,0 +1,216 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include +#include + +#include "intel-hexl/ntt/ntt.hpp" +#include "ntt/ntt-internal.hpp" +#include "number-theory/number-theory.hpp" +#include "util/avx512-util.hpp" + +namespace intel { +namespace hexl { + +// Given input: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +// Returns +// *out1 = _mm512_set_epi64(14, 6, 12, 4, 10, 2, 8, 0); +// *out2 = _mm512_set_epi64(15, 7, 13, 5, 11, 3, 9, 1); +inline void LoadFwdInterleavedT1(const uint64_t* arg, __m512i* out1, + __m512i* out2) { + const __m512i* arg_512 = reinterpret_cast(arg); + + // 0, 1, 2, 3, 4, 5, 6, 7 + __m512i v1 = _mm512_loadu_si512(arg_512++); + // 8, 9, 10, 11, 12, 13, 14, 15 + __m512i v2 = _mm512_loadu_si512(arg_512); + + const __m512i perm_idx = _mm512_set_epi64(6, 7, 4, 5, 2, 3, 0, 1); + + // 1, 0, 3, 2, 5, 4, 7, 6 + __m512i v1_perm = _mm512_permutexvar_epi64(perm_idx, v1); + // 9, 8, 11, 10, 13, 12, 15, 14 + __m512i v2_perm = _mm512_permutexvar_epi64(perm_idx, v2); + + *out1 = _mm512_mask_blend_epi64(0b10101010, v1, v2_perm); + *out2 = _mm512_mask_blend_epi64(0b10101010, v1_perm, v2); +} + +// Given input: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +// Returns +// *out1 = _mm512_set_epi64(14, 12, 10, 8, 6, 4, 2, 0); +// *out2 = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); +inline void LoadInvInterleavedT1(const uint64_t* arg, __m512i* out1, + __m512i* out2) { + const __m512i vperm_hi_idx = _mm512_set_epi64(6, 4, 2, 0, 7, 5, 3, 1); + const __m512i vperm_lo_idx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0); + const __m512i vperm2_idx = _mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4); + + const __m512i* arg_512 = reinterpret_cast(arg); + + // 7, 6, 5, 4, 3, 2, 1, 0 + __m512i v_7to0 = _mm512_loadu_si512(arg_512++); + // 15, 14, 13, 12, 11, 10, 9, 8 + __m512i v_15to8 = _mm512_loadu_si512(arg_512); + // 7, 5, 3, 1, 6, 4, 2, 0 + __m512i perm_lo = _mm512_permutexvar_epi64(vperm_lo_idx, v_7to0); + // 14, 12, 10, 8, 15, 13, 11, 9 + __m512i perm_hi = _mm512_permutexvar_epi64(vperm_hi_idx, v_15to8); + + *out1 = _mm512_mask_blend_epi64(0b00001111, perm_hi, perm_lo); + *out2 = _mm512_mask_blend_epi64(0b11110000, perm_hi, perm_lo); + *out2 = _mm512_permutexvar_epi64(vperm2_idx, *out2); +} + +// Given input: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +// Returns +// *out1 = _mm512_set_epi64(13, 12, 9, 8, 5, 4, 1, 0); +// *out2 = _mm512_set_epi64(15, 14, 11, 10, 7, 6, 3, 2) +inline void LoadFwdInterleavedT2(const uint64_t* arg, __m512i* out1, + __m512i* out2) { + const __m512i* arg_512 = reinterpret_cast(arg); + + // 11, 10, 9, 8, 3, 2, 1, 0 + __m512i v1 = _mm512_loadu_si512(arg_512++); + // 15, 14, 13, 12, 7, 6, 5, 4 + __m512i v2 = _mm512_loadu_si512(arg_512); + + const __m512i v1_perm_idx = _mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2); + + __m512i v1_perm = _mm512_permutexvar_epi64(v1_perm_idx, v1); + __m512i v2_perm = _mm512_permutexvar_epi64(v1_perm_idx, v2); + + *out1 = _mm512_mask_blend_epi64(0b11001100, v1, v2_perm); + *out2 = _mm512_mask_blend_epi64(0b11001100, v1_perm, v2); +} + +// Given input: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +// Returns +// *out1 = _mm512_set_epi64(14, 12, 10, 8, 6, 4, 2, 0); +// *out2 = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); +inline void LoadInvInterleavedT2(const uint64_t* arg, __m512i* out1, + __m512i* out2) { + const __m512i* arg_512 = reinterpret_cast(arg); + + __m512i v1 = _mm512_loadu_si512(arg_512++); + __m512i v2 = _mm512_loadu_si512(arg_512); + + const __m512i v1_perm_idx = _mm512_set_epi64(6, 7, 4, 5, 2, 3, 0, 1); + + __m512i v1_perm = _mm512_permutexvar_epi64(v1_perm_idx, v1); + __m512i v2_perm = _mm512_permutexvar_epi64(v1_perm_idx, v2); + + *out1 = _mm512_mask_blend_epi64(0b10101010, v1, v2_perm); + *out2 = _mm512_mask_blend_epi64(0b10101010, v1_perm, v2); +} + +// Returns +// *out1 = _mm512_set_epi64(arg[11], arg[10], arg[9], arg[8], +// arg[3], arg[2], arg[1], arg[0]); +// *out2 = _mm512_set_epi64(arg[15], arg[14], arg[13], arg[12], +// arg[7], arg[6], arg[5], arg[4]); +inline void LoadFwdInterleavedT4(const uint64_t* arg, __m512i* out1, + __m512i* out2) { + const __m512i* arg_512 = reinterpret_cast(arg); + + const __m512i vperm2_idx = _mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4); + __m512i v_7to0 = _mm512_loadu_si512(arg_512++); + __m512i v_15to8 = _mm512_loadu_si512(arg_512); + __m512i perm_hi = _mm512_permutexvar_epi64(vperm2_idx, v_15to8); + *out1 = _mm512_mask_blend_epi64(0b0001111, perm_hi, v_7to0); + *out2 = _mm512_mask_blend_epi64(0b11110000, perm_hi, v_7to0); + *out2 = _mm512_permutexvar_epi64(vperm2_idx, *out2); +} + +inline void LoadInvInterleavedT4(const uint64_t* arg, __m512i* out1, + __m512i* out2) { + const __m512i* arg_512 = reinterpret_cast(arg); + + // 0, 1, 2, 3, 4, 5, 6, 7 + __m512i v1 = _mm512_loadu_si512(arg_512++); + // 8, 9, 10, 11, 12, 13, 14, 15 + __m512i v2 = _mm512_loadu_si512(arg_512); + const __m512i perm_idx = _mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2); + + // 1, 0, 3, 2, 5, 4, 7, 6 + __m512i v1_perm = _mm512_permutexvar_epi64(perm_idx, v1); + // 9, 8, 11, 10, 13, 12, 15, 14 + __m512i v2_perm = _mm512_permutexvar_epi64(perm_idx, v2); + + *out1 = _mm512_mask_blend_epi64(0b11001100, v1, v2_perm); + *out2 = _mm512_mask_blend_epi64(0b11001100, v1_perm, v2); +} + +// Given inputs +// @param arg1 = _mm512_set_epi64(15, 14, 13, 12, 11, 10, 9, 8); +// @param arg2 = _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0); +// Writes out = {8, 0, 9, 1, 10, 2, 11, 3, +// 12, 4, 13, 5, 14, 6, 15, 7} +inline void WriteFwdInterleavedT1(__m512i arg1, __m512i arg2, __m512i* out) { + const __m512i vperm2_idx = _mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4); + const __m512i v_X_out_idx = _mm512_set_epi64(7, 3, 6, 2, 5, 1, 4, 0); + const __m512i v_Y_out_idx = _mm512_set_epi64(3, 7, 2, 6, 1, 5, 0, 4); + + // v_Y => (4, 5, 6, 7, 0, 1, 2, 3) + arg2 = _mm512_permutexvar_epi64(vperm2_idx, arg2); + // 4, 5, 6, 7, 12, 13, 14, 15 + __m512i perm_lo = _mm512_mask_blend_epi64(0b00001111, arg1, arg2); + + // 8, 9, 10, 11, 0, 1, 2, 3 + __m512i perm_hi = _mm512_mask_blend_epi64(0b11110000, arg1, arg2); + + arg1 = _mm512_permutexvar_epi64(v_X_out_idx, perm_hi); + arg2 = _mm512_permutexvar_epi64(v_Y_out_idx, perm_lo); + + _mm512_storeu_si512(out++, arg1); + _mm512_storeu_si512(out, arg2); +} + +// Given inputs +// @param arg1 = _mm512_set_epi64(15, 14, 13, 12, 11, 10, 9, 8); +// @param arg2 = _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0); +// Writes out = {8, 9, 10, 11, 0, 1, 2, 3, +// 12, 13, 14, 15, 4, 5, 6, 7} +inline void WriteInvInterleavedT4(__m512i arg1, __m512i arg2, __m512i* out) { + __m256i x0 = _mm512_extracti64x4_epi64(arg1, 0); + __m256i x1 = _mm512_extracti64x4_epi64(arg1, 1); + __m256i y0 = _mm512_extracti64x4_epi64(arg2, 0); + __m256i y1 = _mm512_extracti64x4_epi64(arg2, 1); + __m256i* out_256 = reinterpret_cast<__m256i*>(out); + _mm256_storeu_si256(out_256++, x0); + _mm256_storeu_si256(out_256++, y0); + _mm256_storeu_si256(out_256++, x1); + _mm256_storeu_si256(out_256++, y1); +} + +// Returns _mm512_set_epi64(arg[3], arg[3], arg[2], arg[2], +// arg[1], arg[1], arg[0], arg[0]); +inline __m512i LoadWOpT2(const void* arg) { + const __m512i vperm_w_idx = _mm512_set_epi64(3, 3, 2, 2, 1, 1, 0, 0); + + __m256i v_W_op_256 = + _mm256_loadu_si256(reinterpret_cast(arg)); + __m512i v_W_op = _mm512_broadcast_i64x4(v_W_op_256); + v_W_op = _mm512_permutexvar_epi64(vperm_w_idx, v_W_op); + + return v_W_op; +} + +// Returns _mm512_set_epi64(arg[1], arg[1], arg[1], arg[1], +// arg[0], arg[0], arg[0], arg[0]); +inline __m512i LoadWOpT4(const void* arg) { + const __m512i vperm_w_idx = _mm512_set_epi64(1, 1, 1, 1, 0, 0, 0, 0); + + __m128i v_W_op_128 = _mm_loadu_si128(reinterpret_cast(arg)); + __m512i v_W_op = _mm512_broadcast_i64x2(v_W_op_128); + v_W_op = _mm512_permutexvar_epi64(vperm_w_idx, v_W_op); + + return v_W_op; +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/ntt/ntt-internal.cpp b/intel-hexl/ntt/ntt-internal.cpp new file mode 100644 index 00000000..ea72db73 --- /dev/null +++ b/intel-hexl/ntt/ntt-internal.cpp @@ -0,0 +1,480 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "ntt/ntt-internal.hpp" + +#include +#include +#include +#include + +#include "logging/logging.hpp" +#include "ntt/fwd-ntt-avx512.hpp" +#include "ntt/inv-ntt-avx512.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" +#include "util/check.hpp" +#include "util/cpu-features.hpp" + +namespace intel { +namespace hexl { + +NTT::NTTImpl::NTTImpl(uint64_t degree, uint64_t p, uint64_t root_of_unity) + : m_degree(degree), m_p(p), m_w(root_of_unity) { + HEXL_CHECK(CheckNTTArguments(degree, p), ""); + HEXL_CHECK(IsPrimitiveRoot(m_w, 2 * degree, p), + m_w << " is not a primitive 2*" << degree << "'th root of unity"); + +#ifdef HEXL_HAS_AVX512IFMA + if (m_p < s_max_fwd_ifma_modulus) { + HEXL_VLOG(3, "Setting m_fwd_bit_shift to " << s_ifma_shift_bits); + m_fwd_bit_shift = s_ifma_shift_bits; + } + if (m_p < s_max_inv_ifma_modulus) { + HEXL_VLOG(3, "Setting m_inv_bit_shift to " << s_ifma_shift_bits); + m_inv_bit_shift = s_ifma_shift_bits; + } +#endif + + m_degree_bits = Log2(m_degree); + m_winv = InverseUIntMod(m_w, m_p); + ComputeRootOfUnityPowers(); +} + +NTT::NTTImpl::NTTImpl(uint64_t degree, uint64_t p) + : NTTImpl(degree, p, MinimalPrimitiveRoot(2 * degree, p)) {} + +NTT::NTTImpl::~NTTImpl() = default; + +void NTT::NTTImpl::ComputeRootOfUnityPowers() { + AlignedVector64 root_of_unity_powers(m_degree); + AlignedVector64 inv_root_of_unity_powers(m_degree); + + // 64-bit precon + root_of_unity_powers[0] = 1; + inv_root_of_unity_powers[0] = InverseUIntMod(1, m_p); + uint64_t idx = 0; + uint64_t prev_idx = idx; + + for (size_t i = 1; i < m_degree; i++) { + idx = ReverseBitsUInt(i, m_degree_bits); + root_of_unity_powers[idx] = + MultiplyUIntMod(root_of_unity_powers[prev_idx], m_w, m_p); + inv_root_of_unity_powers[idx] = + InverseUIntMod(root_of_unity_powers[idx], m_p); + + prev_idx = idx; + } + + // Reordering inv_root_of_powers + AlignedVector64 temp(m_degree); + temp[0] = inv_root_of_unity_powers[0]; + idx = 1; + + for (size_t m = (m_degree >> 1); m > 0; m >>= 1) { + for (size_t i = 0; i < m; i++) { + temp[idx] = inv_root_of_unity_powers[m + i]; + idx++; + } + } + inv_root_of_unity_powers = temp; + + // 64-bit preconditioned root of unity powers + AlignedVector64 precon64_root_of_unity_powers; + precon64_root_of_unity_powers.reserve(m_degree); + for (uint64_t root_of_unity : root_of_unity_powers) { + MultiplyFactor mf(root_of_unity, 64, m_p); + precon64_root_of_unity_powers.push_back(mf.BarrettFactor()); + } + + NTT::NTTImpl::GetPrecon64RootOfUnityPowers() = + std::move(precon64_root_of_unity_powers); + + // 52-bit preconditioned root of unity powers + AlignedVector64 precon52_root_of_unity_powers; + precon52_root_of_unity_powers.reserve(m_degree); + for (uint64_t root_of_unity : root_of_unity_powers) { + MultiplyFactor mf(root_of_unity, 52, m_p); + precon52_root_of_unity_powers.push_back(mf.BarrettFactor()); + } + + NTT::NTTImpl::GetPrecon52RootOfUnityPowers() = + std::move(precon52_root_of_unity_powers); + + NTT::NTTImpl::GetRootOfUnityPowers() = std::move(root_of_unity_powers); + + // 64-bit preconditioned inverse root of unity powers + AlignedVector64 precon64_inv_root_of_unity_powers; + precon64_inv_root_of_unity_powers.reserve(m_degree); + for (uint64_t inv_root_of_unity : inv_root_of_unity_powers) { + MultiplyFactor mf(inv_root_of_unity, 64, m_p); + precon64_inv_root_of_unity_powers.push_back(mf.BarrettFactor()); + } + + NTT::NTTImpl::GetPrecon64InvRootOfUnityPowers() = + std::move(precon64_inv_root_of_unity_powers); + + // 52-bit preconditioned inverse root of unity powers + AlignedVector64 precon52_inv_root_of_unity_powers; + precon52_inv_root_of_unity_powers.reserve(m_degree); + for (uint64_t inv_root_of_unity : inv_root_of_unity_powers) { + MultiplyFactor mf(inv_root_of_unity, 52, m_p); + precon52_inv_root_of_unity_powers.push_back(mf.BarrettFactor()); + } + + NTT::NTTImpl::GetPrecon52InvRootOfUnityPowers() = + std::move(precon52_inv_root_of_unity_powers); + + NTT::NTTImpl::GetInvRootOfUnityPowers() = std::move(inv_root_of_unity_powers); +} + +void NTT::NTTImpl::ComputeForward(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(m_fwd_bit_shift == s_ifma_shift_bits || + m_fwd_bit_shift == s_default_shift_bits, + "Bit shift " << m_fwd_bit_shift << " should be either " + << s_ifma_shift_bits << " or " + << s_default_shift_bits); + HEXL_CHECK(result != nullptr, "result == nullptr"); + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + HEXL_CHECK_BOUNDS(operand, m_degree, m_p * input_mod_factor); + + if (result != operand) { + std::memcpy(result, operand, m_degree * sizeof(uint64_t)); + } + +#ifdef HEXL_HAS_AVX512IFMA + if (has_avx512ifma && m_fwd_bit_shift == s_ifma_shift_bits && + (m_p < s_max_fwd_ifma_modulus && (m_degree >= 16))) { + const uint64_t* root_of_unity_powers = GetRootOfUnityPowersPtr(); + const uint64_t* precon_root_of_unity_powers = + GetPrecon52RootOfUnityPowersPtr(); + + HEXL_VLOG(3, "Calling 52-bit AVX512-IFMA NTT"); + ForwardTransformToBitReverseAVX512( + result, m_degree, m_p, root_of_unity_powers, + precon_root_of_unity_powers, input_mod_factor, output_mod_factor); + return; + } +#endif + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq && m_degree >= 16) { + HEXL_VLOG(3, "Calling 64-bit AVX512 NTT"); + const uint64_t* root_of_unity_powers = GetRootOfUnityPowersPtr(); + const uint64_t* precon_root_of_unity_powers = + GetPrecon64RootOfUnityPowersPtr(); + + ForwardTransformToBitReverseAVX512( + result, m_degree, m_p, root_of_unity_powers, + precon_root_of_unity_powers, input_mod_factor, output_mod_factor); + return; + } +#endif + + HEXL_VLOG(3, "Calling 64-bit default NTT"); + const uint64_t* root_of_unity_powers = GetRootOfUnityPowersPtr(); + const uint64_t* precon_root_of_unity_powers = + GetPrecon64RootOfUnityPowersPtr(); + + ForwardTransformToBitReverse64(result, m_degree, m_p, root_of_unity_powers, + precon_root_of_unity_powers, input_mod_factor, + output_mod_factor); +} + +void NTT::NTTImpl::ComputeInverse(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + + HEXL_CHECK_BOUNDS(operand, m_degree, m_p * input_mod_factor); + + if (operand != result) { + std::memcpy(result, operand, m_degree * sizeof(uint64_t)); + } + + HEXL_CHECK(m_inv_bit_shift == s_ifma_shift_bits || + m_inv_bit_shift == s_default_shift_bits, + "Bit shift " << m_inv_bit_shift << " should be either " + << s_ifma_shift_bits << " or " + << s_default_shift_bits); + +#ifdef HEXL_HAS_AVX512IFMA + if (has_avx512ifma && m_inv_bit_shift == s_ifma_shift_bits && + (m_p < s_max_inv_ifma_modulus) && (m_degree >= 16)) { + HEXL_VLOG(3, "Calling 52-bit AVX512-IFMA InvNTT"); + const uint64_t* inv_root_of_unity_powers = GetInvRootOfUnityPowersPtr(); + const uint64_t* precon_inv_root_of_unity_powers = + GetPrecon52InvRootOfUnityPowersPtr(); + InverseTransformFromBitReverseAVX512( + result, m_degree, m_p, inv_root_of_unity_powers, + precon_inv_root_of_unity_powers, input_mod_factor, output_mod_factor); + return; + } +#endif + +#ifdef HEXL_HAS_AVX512DQ + if (has_avx512dq && m_degree >= 16) { + HEXL_VLOG(3, "Calling 64-bit AVX512 InvNTT"); + const uint64_t* inv_root_of_unity_powers = GetInvRootOfUnityPowersPtr(); + const uint64_t* precon_inv_root_of_unity_powers = + GetPrecon64InvRootOfUnityPowersPtr(); + + InverseTransformFromBitReverseAVX512( + result, m_degree, m_p, inv_root_of_unity_powers, + precon_inv_root_of_unity_powers, input_mod_factor, output_mod_factor); + return; + } +#endif + + HEXL_VLOG(3, "Calling 64-bit default InvNTT"); + const uint64_t* inv_root_of_unity_powers = GetInvRootOfUnityPowersPtr(); + const uint64_t* precon_inv_root_of_unity_powers = + GetPrecon64InvRootOfUnityPowersPtr(); + InverseTransformFromBitReverse64( + result, m_degree, m_p, inv_root_of_unity_powers, + precon_inv_root_of_unity_powers, input_mod_factor, output_mod_factor); +} + +// NTT API +NTT::NTT() = default; + +NTT::NTT(uint64_t degree, uint64_t p) + : m_impl(std::make_shared(degree, p)) {} + +NTT::NTT(uint64_t degree, uint64_t p, uint64_t root_of_unity) + : m_impl(std::make_shared(degree, p, root_of_unity)) {} + +NTT::~NTT() = default; + +void NTT::ComputeForward(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + HEXL_CHECK(result != nullptr, "result == nullptr"); + HEXL_CHECK( + input_mod_factor == 1 || input_mod_factor == 2 || input_mod_factor == 4, + "input_mod_factor must be 1, 2 or 4; got " << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 4, + "output_mod_factor must be 1 or 4; got " << output_mod_factor); + + m_impl->ComputeForward(result, operand, input_mod_factor, output_mod_factor); +} + +void NTT::ComputeInverse(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + HEXL_CHECK(result != nullptr, "result == nullptr"); + HEXL_CHECK(input_mod_factor == 1 || input_mod_factor == 2, + "input_mod_factor must be 1 or 2; got " << input_mod_factor); + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 2, + "output_mod_factor must be 1 or 2; got " << output_mod_factor); + + m_impl->ComputeInverse(result, operand, input_mod_factor, output_mod_factor); +} + +// Free functions + +void ForwardTransformToBitReverse64(uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* root_of_unity_powers, + const uint64_t* precon_root_of_unity_powers, + uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(CheckNTTArguments(n, mod), ""); + HEXL_CHECK_BOUNDS(operand, n, mod * input_mod_factor); + HEXL_CHECK(root_of_unity_powers != nullptr, + "root_of_unity_powers == nullptr"); + HEXL_CHECK(precon_root_of_unity_powers != nullptr, + "precon_root_of_unity_powers == nullptr"); + HEXL_CHECK( + input_mod_factor == 1 || input_mod_factor == 2 || input_mod_factor == 4, + "input_mod_factor must be 1, 2, or 4; got " << input_mod_factor); + (void)(input_mod_factor); // Avoid unused parameter warning + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 4, + "output_mod_factor must be 1 or 4; got " << output_mod_factor); + + uint64_t twice_mod = mod << 1; + size_t t = (n >> 1); + + for (size_t m = 1; m < n; m <<= 1) { + size_t j1 = 0; + for (size_t i = 0; i < m; i++) { + size_t j2 = j1 + t; + const uint64_t W_op = root_of_unity_powers[m + i]; + const uint64_t W_precon = precon_root_of_unity_powers[m + i]; + + uint64_t* X = operand + j1; + uint64_t* Y = X + t; + + uint64_t tx; + uint64_t T; + HEXL_LOOP_UNROLL_4 + for (size_t j = j1; j < j2; j++) { + // The Harvey butterfly: assume X, Y in [0, 4p), and return X', Y' + // in [0, 4p). Such that X', Y' = X + WY, X - WY (mod p). + // See Algorithm 4 of https://arxiv.org/pdf/1205.2926.pdf + HEXL_CHECK(*X < mod * 4, "input X " << (*X) << " too large"); + HEXL_CHECK(*Y < mod * 4, "input Y " << (*Y) << " too large"); + + tx = (*X >= twice_mod) ? (*X - twice_mod) : *X; + T = MultiplyUIntModLazy<64>(*Y, W_op, W_precon, mod); + + *X++ = tx + T; + *Y++ = tx + twice_mod - T; + + HEXL_CHECK(tx + T < mod * 4, "ouput X " << (tx + T) << " too large"); + HEXL_CHECK(tx + twice_mod - T < mod * 4, + "output Y " << (tx + twice_mod - T) << " too large"); + } + j1 += (t << 1); + } + t >>= 1; + } + if (output_mod_factor == 1) { + for (size_t i = 0; i < n; ++i) { + if (operand[i] >= twice_mod) { + operand[i] -= twice_mod; + } + if (operand[i] >= mod) { + operand[i] -= mod; + } + HEXL_CHECK(operand[i] < mod, "Incorrect modulus reduction in NTT " + << operand[i] << " >= " << mod); + } + } +} + +void ReferenceForwardTransformToBitReverse( + uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* root_of_unity_powers) { + HEXL_CHECK(CheckNTTArguments(n, mod), ""); + HEXL_CHECK(root_of_unity_powers != nullptr, + "root_of_unity_powers == nullptr"); + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + + size_t t = (n >> 1); + for (size_t m = 1; m < n; m <<= 1) { + size_t j1 = 0; + for (size_t i = 0; i < m; i++) { + size_t j2 = j1 + t; + const uint64_t W_op = root_of_unity_powers[m + i]; + + uint64_t* X = operand + j1; + uint64_t* Y = X + t; + for (size_t j = j1; j < j2; j++) { + uint64_t tx = *X; + // X', Y' = X + WY, X - WY (mod p). + uint64_t W_x_Y = MultiplyUIntMod(*Y, W_op, mod); + *X++ = AddUIntMod(tx, W_x_Y, mod); + *Y++ = SubUIntMod(tx, W_x_Y, mod); + } + j1 += (t << 1); + } + t >>= 1; + } +} + +void InverseTransformFromBitReverse64( + uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* inv_root_of_unity_powers, + const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, + uint64_t output_mod_factor) { + HEXL_CHECK(CheckNTTArguments(n, mod), ""); + HEXL_CHECK(inv_root_of_unity_powers != nullptr, + "inv_root_of_unity_powers == nullptr"); + HEXL_CHECK(precon_inv_root_of_unity_powers != nullptr, + "precon_inv_root_of_unity_powers == nullptr"); + HEXL_CHECK(operand != nullptr, "operand == nullptr"); + HEXL_CHECK(input_mod_factor == 1 || input_mod_factor == 2, + "input_mod_factor must be 1 or 2; got " << input_mod_factor); + (void)(input_mod_factor); // Avoid unused parameter warning + HEXL_CHECK(output_mod_factor == 1 || output_mod_factor == 2, + "output_mod_factor must be 1 or 2; got " << output_mod_factor); + + uint64_t twice_mod = mod << 1; + size_t t = 1; + size_t root_index = 1; + + for (size_t m = (n >> 1); m > 1; m >>= 1) { + size_t j1 = 0; + for (size_t i = 0; i < m; i++, root_index++) { + size_t j2 = j1 + t; + const uint64_t W_op = inv_root_of_unity_powers[root_index]; + const uint64_t W_op_precon = precon_inv_root_of_unity_powers[root_index]; + + HEXL_VLOG(4, "m = " << i << ", i = " << i); + HEXL_VLOG(4, "j1 = " << j1 << ", j2 = " << j2); + + uint64_t* X = operand + j1; + uint64_t* Y = X + t; + + uint64_t tx; + uint64_t ty; + + HEXL_LOOP_UNROLL_4 + for (size_t j = j1; j < j2; j++) { + HEXL_VLOG(4, "Loaded *X " << *X); + HEXL_VLOG(4, "Loaded *Y " << *Y); + // The Harvey butterfly: assume X, Y in [0, 2p), and return X', Y' + // in [0, 2p). X', Y' = X + Y (mod p), W(X - Y) (mod p). + tx = *X + *Y; + ty = *X + twice_mod - *Y; + + *X++ = (tx >= twice_mod) ? (tx - twice_mod) : tx; + *Y++ = MultiplyUIntModLazy<64>(ty, W_op, W_op_precon, mod); + } + j1 += (t << 1); + } + t <<= 1; + } + + const uint64_t W_op = inv_root_of_unity_powers[root_index]; + const uint64_t inv_n = InverseUIntMod(n, mod); + const uint64_t inv_n_w = MultiplyUIntMod(inv_n, W_op, mod); + + uint64_t* X = operand; + uint64_t* Y = X + (n >> 1); + uint64_t tx; + uint64_t ty; + + for (size_t j = (n >> 1); j < n; j++) { + tx = *X + *Y; + if (tx >= twice_mod) { + tx -= twice_mod; + } + ty = *X + twice_mod - *Y; + *X++ = MultiplyUIntModLazy<64>(tx, inv_n, mod); + *Y++ = MultiplyUIntModLazy<64>(ty, inv_n_w, mod); + } + + if (output_mod_factor == 1) { + // Reduce from [0, 2p) to [0,p) + for (size_t i = 0; i < n; ++i) { + if (operand[i] >= mod) { + operand[i] -= mod; + } + HEXL_CHECK(operand[i] < mod, "Incorrect modulus reduction in InvNTT" + << operand[i] << " >= " << mod); + } + } +} + +bool CheckNTTArguments(uint64_t degree, uint64_t p) { + // Avoid unused parameter warnings + (void)degree; + (void)p; + HEXL_CHECK(IsPowerOfTwo(degree), + "degree " << degree << " is not a power of 2"); + HEXL_CHECK(degree <= (1 << NTT::NTTImpl::s_max_degree_bits), + "degree should be less than 2^" << NTT::NTTImpl::s_max_degree_bits + << " got " << degree); + + HEXL_CHECK(p % (2 * degree) == 1, "p mod 2n != 1"); + return true; +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/ntt/ntt-internal.hpp b/intel-hexl/ntt/ntt-internal.hpp new file mode 100644 index 00000000..a1025221 --- /dev/null +++ b/intel-hexl/ntt/ntt-internal.hpp @@ -0,0 +1,171 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include + +#include "intel-hexl/ntt/ntt.hpp" +#include "intel-hexl/util/util.hpp" +#include "number-theory/number-theory.hpp" +#include "util/aligned-allocator.hpp" +#include "util/check.hpp" +#include "util/util-internal.hpp" + +namespace intel { +namespace hexl { + +class NTT::NTTImpl { + public: + NTTImpl(uint64_t degree, uint64_t p, uint64_t root_of_unity); + NTTImpl(uint64_t degree, uint64_t p); + + ~NTTImpl(); + + uint64_t GetMinimalRootOfUnity() const { return m_w; } + + uint64_t GetDegree() const { return m_degree; } + + uint64_t GetModulus() const { return m_p; } + + AlignedVector64& GetPrecon64RootOfUnityPowers() { + return m_precon64_root_of_unity_powers; + } + + uint64_t* GetPrecon64RootOfUnityPowersPtr() { + return GetPrecon64RootOfUnityPowers().data(); + } + + AlignedVector64& GetPrecon52RootOfUnityPowers() { + return m_precon52_root_of_unity_powers; + } + + uint64_t* GetPrecon52RootOfUnityPowersPtr() { + return GetPrecon52RootOfUnityPowers().data(); + } + + uint64_t* GetRootOfUnityPowersPtr() { return GetRootOfUnityPowers().data(); } + + // Returns the vector of pre-computed root of unity powers for the modulus + // and root of unity. + AlignedVector64& GetRootOfUnityPowers() { + return m_root_of_unity_powers; + } + + // Returns the root of unity at index i. + uint64_t GetRootOfUnityPower(size_t i) { return GetRootOfUnityPowers()[i]; } + + // Returns the vector of 64-bit pre-conditioned pre-computed root of unity + // powers for the modulus and root of unity. + AlignedVector64& GetPrecon64InvRootOfUnityPowers() { + return m_precon64_inv_root_of_unity_powers; + } + + uint64_t* GetPrecon64InvRootOfUnityPowersPtr() { + return GetPrecon64InvRootOfUnityPowers().data(); + } + + // Returns the vector of 52-bit pre-conditioned pre-computed root of unity + // powers for the modulus and root of unity. + AlignedVector64& GetPrecon52InvRootOfUnityPowers() { + return m_precon52_inv_root_of_unity_powers; + } + + uint64_t* GetPrecon52InvRootOfUnityPowersPtr() { + return GetPrecon52InvRootOfUnityPowers().data(); + } + + AlignedVector64& GetInvRootOfUnityPowers() { + return m_inv_root_of_unity_powers; + } + + uint64_t* GetInvRootOfUnityPowersPtr() { + return GetInvRootOfUnityPowers().data(); + } + + uint64_t GetInvRootOfUnityPower(size_t i) { + return GetInvRootOfUnityPowers()[i]; + } + + void ComputeForward(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, uint64_t output_mod_factor); + + void ComputeInverse(uint64_t* result, const uint64_t* operand, + uint64_t input_mod_factor, uint64_t output_mod_factor); + + static const size_t s_max_degree_bits{20}; // Maximum power of 2 in degree + + // Maximum number of bits in modulus; + static const size_t s_max_modulus_bits{62}; + + // Default bit shift used in Barrett precomputation + static const size_t s_default_shift_bits{64}; + + // Bit shift used in Barrett precomputation when IFMA acceleration is enabled + static const size_t s_ifma_shift_bits{52}; + + // Maximum number of bits in modulus to use IFMA acceleration for the forward + // transform + static const size_t s_max_fwd_ifma_modulus{1ULL << (s_ifma_shift_bits - 2)}; + + // Maximum number of bits in modulus to use IFMA acceleration for the inverse + // transform + static const size_t s_max_inv_ifma_modulus{1ULL << (s_ifma_shift_bits - 1)}; + + private: + void ComputeRootOfUnityPowers(); + uint64_t m_degree; // N: size of NTT transform, should be power of 2 + uint64_t m_p; // prime modulus + + uint64_t m_degree_bits; // log_2(m_degree) + // Bit shift to use in computing Barrett reduction for forward transform + uint64_t m_fwd_bit_shift{s_default_shift_bits}; + // Bit shift to use in computing Barrett reduction for inverse transform + uint64_t m_inv_bit_shift{s_default_shift_bits}; + + uint64_t m_winv; // Inverse of minimal root of unity + uint64_t m_w; // A 2N'th root of unity + + // vector of floor(W * 2**52 / m_p), with W the root of unity powers + AlignedVector64 m_precon52_root_of_unity_powers; + // vector of floor(W * 2**64 / m_p), with W the root of unity powers + AlignedVector64 m_precon64_root_of_unity_powers; + // powers of the minimal root of unity + AlignedVector64 m_root_of_unity_powers; + + // vector of floor(W * 2**52 / m_p), with W the inverse root of unity powers + AlignedVector64 m_precon52_inv_root_of_unity_powers; + // vector of floor(W * 2**64 / m_p), with W the inverse root of unity powers + AlignedVector64 m_precon64_inv_root_of_unity_powers; + + AlignedVector64 m_inv_root_of_unity_powers; +}; + +void ForwardTransformToBitReverse64(uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* root_of_unity_powers, + const uint64_t* precon_root_of_unity_powers, + uint64_t input_mod_factor = 1, + uint64_t output_mod_factor = 1); + +/// @brief Reference NTT which is written for clarity rather than performance +/// @param[in, out] operand Input data. Overwritten with NTT output +/// @param[in] n Size of the transfrom, a.k.a. degree. Must be a power of two. +/// @param[in] mod Prime modulus. Must satisfy Must satisfy p == 1 mod 2N +/// @param[in] root_of_unity_powers Powers of 2N'th root of unity in F_p. In +/// bit-reversed order +void ReferenceForwardTransformToBitReverse( + uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* root_of_unity_powers); + +void InverseTransformFromBitReverse64( + uint64_t* operand, uint64_t n, uint64_t mod, + const uint64_t* inv_root_of_unity_powers, + const uint64_t* precon_inv_root_of_unity_powers, + uint64_t input_mod_factor = 1, uint64_t output_mod_factor = 1); + +bool CheckNTTArguments(uint64_t degree, uint64_t p); + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/number-theory/number-theory.cpp b/intel-hexl/number-theory/number-theory.cpp new file mode 100644 index 00000000..d07851d7 --- /dev/null +++ b/intel-hexl/number-theory/number-theory.cpp @@ -0,0 +1,251 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "number-theory/number-theory.hpp" + +#include + +#include "logging/logging.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +uint64_t InverseUIntMod(uint64_t input, uint64_t modulus) { + uint64_t a = input % modulus; + HEXL_CHECK(a != 0, input << " does not have a InverseMod"); + + if (modulus == 1) { + return 0; + } + + int64_t m0 = modulus; + int64_t y = 0; + int64_t x = 1; + while (a > 1) { + // q is quotient + int64_t q = a / modulus; + + int64_t t = modulus; + modulus = a % modulus; + a = t; + + // Update y and x + t = y; + y = x - q * y; + x = t; + } + + // Make x positive + if (x < 0) x += m0; + + return uint64_t(x); +} + +uint64_t BarrettReduce64(uint64_t input, uint64_t modulus, uint64_t p_barr) { + HEXL_CHECK(modulus != 0, "modulus == 0"); + uint64_t q = MultiplyUInt64Hi<64>(input, p_barr); + uint64_t q_times_input = input - q * modulus; + return q_times_input >= modulus ? q_times_input - modulus : q_times_input; +} + +uint64_t MultiplyUIntMod(uint64_t x, uint64_t y, uint64_t modulus) { + HEXL_CHECK(modulus != 0, "modulus == 0"); + HEXL_CHECK(x < modulus, "x " << x << " >= modulus " << modulus); + HEXL_CHECK(y < modulus, "y " << y << " >= modulus " << modulus); + uint64_t prod_hi, prod_lo; + MultiplyUInt64(x, y, &prod_hi, &prod_lo); + + return BarrettReduce128(prod_hi, prod_lo, modulus); +} + +uint64_t MultiplyMod(uint64_t x, uint64_t y, uint64_t y_precon, + uint64_t modulus) { + uint64_t q = MultiplyUInt64Hi<64>(x, y_precon); + q = x * y - q * modulus; + return q >= modulus ? q - modulus : q; +} + +uint64_t AddUIntMod(uint64_t x, uint64_t y, uint64_t modulus) { + HEXL_CHECK(x < modulus, "x " << x << " >= modulus " << modulus); + HEXL_CHECK(y < modulus, "y " << y << " >= modulus " << modulus); + uint64_t sum = x + y; + return (sum >= modulus) ? (sum - modulus) : sum; +} + +uint64_t SubUIntMod(uint64_t x, uint64_t y, uint64_t modulus) { + HEXL_CHECK(x < modulus, "x " << x << " >= modulus " << modulus); + HEXL_CHECK(y < modulus, "y " << y << " >= modulus " << modulus); + uint64_t diff = (x + modulus) - y; + return (diff >= modulus) ? (diff - modulus) : diff; +} + +// Returns base^exp mod modulus +uint64_t PowMod(uint64_t base, uint64_t exp, uint64_t modulus) { + base %= modulus; + uint64_t result = 1; + while (exp > 0) { + if (exp & 1) { + result = MultiplyUIntMod(result, base, modulus); + } + base = MultiplyUIntMod(base, base, modulus); + exp >>= 1; + } + return result; +} + +// Returns true whether root is a degree-th root of unity +// degree must be a power of two. +bool IsPrimitiveRoot(uint64_t root, uint64_t degree, uint64_t modulus) { + if (root == 0) { + return false; + } + HEXL_CHECK(IsPowerOfTwo(degree), degree << " not a power of 2"); + + HEXL_VLOG(4, "IsPrimitiveRoot root " << root << ", degree " << degree + << ", modulus " << modulus); + + // Check if root^(degree/2) == -1 mod modulus + return PowMod(root, degree / 2, modulus) == (modulus - 1); +} + +// Tries to return a primitive degree-th root of unity +// throw error if no root is found +uint64_t GeneratePrimitiveRoot(uint64_t degree, uint64_t modulus) { + std::default_random_engine generator; + std::uniform_int_distribution distribution(0, modulus - 1); + + // We need to divide modulus-1 by degree to get the size of the quotient group + uint64_t size_entire_group = modulus - 1; + + // Compute size of quotient group + uint64_t size_quotient_group = size_entire_group / degree; + + for (int trial = 0; trial < 200; ++trial) { + uint64_t root = distribution(generator); + root = PowMod(root, size_quotient_group, modulus); + + if (IsPrimitiveRoot(root, degree, modulus)) { + return root; + } + } + HEXL_CHECK(false, "no primitive root found for degree " + << degree << " modulus " << modulus); + return 0; +} + +// Returns true whether root is a degree-th root of unity +// degree must be a power of two. +uint64_t MinimalPrimitiveRoot(uint64_t degree, uint64_t modulus) { + HEXL_CHECK(IsPowerOfTwo(degree), + "Degere " << degree << " is not a power of 2"); + + uint64_t root = GeneratePrimitiveRoot(degree, modulus); + + uint64_t generator_sq = MultiplyUIntMod(root, root, modulus); + uint64_t current_generator = root; + + uint64_t min_root = root; + + // Check if root^(degree/2) == -1 mod modulus + for (size_t i = 0; i < degree; ++i) { + if (current_generator < min_root) { + min_root = current_generator; + } + current_generator = + MultiplyUIntMod(current_generator, generator_sq, modulus); + } + + return min_root; +} + +uint64_t ReverseBitsUInt(uint64_t x, uint64_t bit_width) { + if (bit_width == 0) { + return 0; + } + uint64_t rev = 0; + for (uint64_t i = bit_width; i > 0; i--) { + rev |= ((x & 1) << (i - 1)); + x >>= 1; + } + return rev; +} + +// Miller-Rabin primality test +bool IsPrime(uint64_t n) { + // n < 2^64, so it is enough to test a=2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, + // and 37. See + // https://en.wikipedia.org/wiki/Miller%E2%80%93Rabin_primality_test#Testing_against_small_sets_of_bases + static const std::vector as{2, 3, 5, 7, 11, 13, + 17, 19, 23, 29, 31, 37}; + + for (const uint64_t a : as) { + if (n == a) return true; + if (n % a == 0) return false; + } + + // Write n == 2**r * d + 1 with d odd. + uint64_t r = 63; + while (r > 0) { + uint64_t two_pow_r = (1ULL << r); + if ((n - 1) % two_pow_r == 0) { + break; + } + --r; + } + HEXL_CHECK(r != 0, "Error factoring n " << n); + uint64_t d = (n - 1) / (1ULL << r); + + HEXL_CHECK(n == (1ULL << r) * d + 1, "Error factoring n " << n); + HEXL_CHECK(d % 2 == 1, "d is even"); + + for (const uint64_t a : as) { + uint64_t x = PowMod(a, d, n); + if ((x == 1) || (x == n - 1)) { + continue; + } + + bool prime = false; + for (uint64_t i = 1; i < r; ++i) { + x = PowMod(x, 2, n); + if (x == n - 1) { + prime = true; + break; + } + } + if (!prime) { + return false; + } + } + return true; +} + +std::vector GeneratePrimes(size_t num_primes, size_t bit_size, + size_t ntt_size) { + HEXL_CHECK(num_primes > 0, "num_primes == 0"); + HEXL_CHECK(IsPowerOfTwo(ntt_size), + "ntt_size " << ntt_size << " is not a power of two"); + HEXL_CHECK(Log2(ntt_size) < bit_size, + "log2(ntt_size) " << Log2(ntt_size) + << " should be less than bit_size " << bit_size); + + uint64_t value = (1ULL << bit_size) + 1; + + std::vector ret; + + while (value < (1ULL << (bit_size + 1))) { + if (IsPrime(value)) { + ret.emplace_back(value); + if (ret.size() == num_primes) { + return ret; + } + } + value += 2 * ntt_size; + } + + HEXL_CHECK(false, "Failed to find enough primes"); + return ret; +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/number-theory/number-theory.hpp b/intel-hexl/number-theory/number-theory.hpp new file mode 100644 index 00000000..2674120d --- /dev/null +++ b/intel-hexl/number-theory/number-theory.hpp @@ -0,0 +1,230 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include +#include +#include + +#include "util/check.hpp" +#include "util/compiler.hpp" + +namespace intel { +namespace hexl { + +// Stores an integer on which modular multiplication can be performed more +// efficiently, at the cost of some precomputation. +class MultiplyFactor { + public: + MultiplyFactor() = default; + + // Computes and stores the Barrett factor (operand << bit_shift) / modulus + MultiplyFactor(uint64_t operand, uint64_t bit_shift, uint64_t modulus) + : m_operand(operand) { + HEXL_CHECK(operand <= modulus, "operand " << operand + << " must be less than modulus " + << modulus); + HEXL_CHECK(bit_shift == 64 || bit_shift == 52, + "Unsupport BitShift " << bit_shift); + uint64_t op_hi{0}; + uint64_t op_lo{0}; + + if (bit_shift == 64) { + op_hi = operand; + op_lo = 0; + } else if (bit_shift == 52) { + op_hi = operand >> 12; + op_lo = operand << 52; + } + m_barrett_factor = DivideUInt128UInt64Lo(op_hi, op_lo, modulus); + } + + inline uint64_t BarrettFactor() const { return m_barrett_factor; } + inline uint64_t Operand() const { return m_operand; } + + private: + uint64_t m_operand; + uint64_t m_barrett_factor; +}; + +// Returns whether or not num is a power of two +inline bool IsPowerOfTwo(uint64_t num) { return num && !(num & (num - 1)); } + +// Returns log2(x) for x a power of 2 +inline uint64_t Log2(uint64_t x) { + HEXL_CHECK(IsPowerOfTwo(x), x << " not a power of 2"); + uint64_t ret = 0; + while (x >>= 1) ++ret; + return ret; +} + +// Returns the maximum value that can be represented using bits bits +inline uint64_t MaximumValue(uint64_t bits) { + HEXL_CHECK(bits <= 64, "MaximumValue requires bits <= 64; got " << bits); + if (bits == 64) { + return (std::numeric_limits::max)(); + } + return (1ULL << bits) - 1; +} + +// Reverses the bits +uint64_t ReverseBitsUInt(uint64_t x, uint64_t bits); + +// Returns a^{-1} mod modulus +uint64_t InverseUIntMod(uint64_t a, uint64_t modulus); + +//// Returns (x * y) mod modulus +//// Assumes x, y < modulus +uint64_t MultiplyUIntMod(uint64_t x, uint64_t y, uint64_t modulus); + +// Returns (x * y) mod modulus +// @param y_precon floor(2**64 / modulus) +uint64_t MultiplyMod(uint64_t x, uint64_t y, uint64_t y_precon, + uint64_t modulus); + +// Returns (x + y) mod modulus +// Assumes x, y < modulus +uint64_t AddUIntMod(uint64_t x, uint64_t y, uint64_t modulus); + +// Returns (x - y) mod modulus +// Assumes x, y < modulus +uint64_t SubUIntMod(uint64_t x, uint64_t y, uint64_t modulus); + +// Returns base^exp mod modulus +uint64_t PowMod(uint64_t base, uint64_t exp, uint64_t modulus); + +// Returns true whether root is a degree-th root of unity +// degree must be a power of two. +bool IsPrimitiveRoot(uint64_t root, uint64_t degree, uint64_t modulus); + +// Tries to return a primtiive degree-th root of unity +// Returns -1 if no root is found +uint64_t GeneratePrimitiveRoot(uint64_t degree, uint64_t modulus); + +// Returns true whether root is a degree-th root of unity +// degree must be a power of two. +uint64_t MinimalPrimitiveRoot(uint64_t degree, uint64_t modulus); + +// Computes (x * y) mod modulus, except that the output is in [0, 2 * modulus] +// @param modulus_precon Pre-computed Barrett reduction factor +template +inline uint64_t MultiplyUIntModLazy(uint64_t x, uint64_t y_operand, + uint64_t y_barrett_factor, + uint64_t modulus) { + HEXL_CHECK(y_operand < modulus, "y_operand " << y_operand + << " must be less than modulus " + << modulus); + HEXL_CHECK( + modulus <= MaximumValue(BitShift), + "Modulus " << modulus << " exceeds bound " << MaximumValue(BitShift)); + HEXL_CHECK(x <= MaximumValue(BitShift), + "Operand " << x << " exceeds bound " << MaximumValue(BitShift)); + + uint64_t Q = MultiplyUInt64Hi(x, y_barrett_factor); + return y_operand * x - Q * modulus; +} + +// Computes (x * y) mod modulus, except that the output is in [0, 2 * modulus] +template +inline uint64_t MultiplyUIntModLazy(uint64_t x, uint64_t y, uint64_t modulus) { + HEXL_CHECK(BitShift == 64 || BitShift == 52, + "Unsupport BitShift " << BitShift); + HEXL_CHECK(x <= MaximumValue(BitShift), + "Operand " << x << " exceeds bound " << MaximumValue(BitShift)); + HEXL_CHECK(y < modulus, + "y " << y << " must be less than modulus " << modulus); + HEXL_CHECK( + modulus <= MaximumValue(BitShift), + "Modulus " << modulus << " exceeds bound " << MaximumValue(BitShift)); + uint64_t y_hi{0}; + uint64_t y_lo{0}; + if (BitShift == 64) { + y_hi = y; + y_lo = 0; + } else if (BitShift == 52) { + y_hi = y >> 12; + y_lo = y << 52; + } + uint64_t y_barrett = DivideUInt128UInt64Lo(y_hi, y_lo, modulus); + return MultiplyUIntModLazy(x, y, y_barrett, modulus); +} + +// Adds two unsigned 64-bit integers +// @param operand1 Number to add +// @param operand2 Number to add +// @param result Stores the sum +// @return The carry bit +inline unsigned char AddUInt64(uint64_t operand1, uint64_t operand2, + uint64_t* result) { + *result = operand1 + operand2; + return static_cast(*result < operand1); +} + +// Returns whether or not the input is prime +bool IsPrime(uint64_t n); + +// Generates a list of num_primes primes in the range [2^(bit_size, +// 2^(bit_size+1)]. Ensures each prime p satisfies +// p % (2*ntt_size+1)) == 1 +// @param num_primes Number of primes to generate +// @param bit_size Bit size of each prime +// @param ntt_size N such that each prime p satisfies p % (2N) == 1. N must be +// a power of two +std::vector GeneratePrimes(size_t num_primes, size_t bit_size, + size_t ntt_size = 1); + +// returns input mod modulus, computed via Barrett reduction +// @param p_barr floor(2^64 / p) +uint64_t BarrettReduce64(uint64_t input, uint64_t modulus, uint64_t p_barr); + +template +uint64_t ReduceMod(uint64_t x, uint64_t modulus, + const uint64_t* twice_modulus = nullptr, + const uint64_t* four_times_modulus = nullptr) { + HEXL_CHECK(InputModFactor == 1 || InputModFactor == 2 || + InputModFactor == 4 || InputModFactor == 8, + "InputModFactor should be 1, 2, 4, or 8"); + if (InputModFactor == 1) { + return x; + } + if (InputModFactor == 2) { + if (x >= modulus) { + x -= modulus; + } + return x; + } + if (InputModFactor == 4) { + HEXL_CHECK(twice_modulus != nullptr, "twice_modulus should not be nullptr"); + if (x >= *twice_modulus) { + x -= *twice_modulus; + } + if (x >= modulus) { + x -= modulus; + } + return x; + } + if (InputModFactor == 8) { + HEXL_CHECK(twice_modulus != nullptr, "twice_modulus should not be nullptr"); + HEXL_CHECK(four_times_modulus != nullptr, + "four_times_modulus should not be nullptr"); + + if (x >= *four_times_modulus) { + x -= *four_times_modulus; + } + if (x >= *twice_modulus) { + x -= *twice_modulus; + } + if (x >= modulus) { + x -= modulus; + } + return x; + } + HEXL_CHECK(false, "Should be unreachable"); + return x; +} + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/aligned-allocator.hpp b/intel-hexl/util/aligned-allocator.hpp new file mode 100644 index 00000000..b5774760 --- /dev/null +++ b/intel-hexl/util/aligned-allocator.hpp @@ -0,0 +1,82 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include +#include + +#include "number-theory/number-theory.hpp" + +namespace intel { +namespace hexl { + +template +class AlignedAllocator { + public: + using value_type = T; + + AlignedAllocator() noexcept {} + + AlignedAllocator(const AlignedAllocator&) {} + + template + AlignedAllocator(const AlignedAllocator&) {} + + ~AlignedAllocator() {} + + template + struct rebind { + using other = AlignedAllocator; + }; + + bool operator==(const AlignedAllocator&) { return true; } + + bool operator!=(const AlignedAllocator&) { return false; } + + T* allocate(std::size_t n) { + if (!IsPowerOfTwo(Alignment)) { + return nullptr; + } + // Allocate enough space to ensure the alignment can be satisfied + size_t buffer_size = sizeof(T) * n + Alignment; + // Additionally, allocate a prefix to store the memory location of the + // unaligned buffer + size_t alloc_size = buffer_size + sizeof(void*); + void* buffer = std::malloc(alloc_size); + if (!buffer) { + return nullptr; + } + + // Reserve first location for pointer to originally-allocated space + void* aligned_buffer = static_cast(buffer) + sizeof(void*); + std::align(Alignment, sizeof(T) * n, aligned_buffer, buffer_size); + if (!aligned_buffer) { + return nullptr; + } + + // Store allocated buffer address at aligned_buffer - sizeof(void*). + void* store_buffer_addr = + static_cast(aligned_buffer) - sizeof(void*); + *(static_cast(store_buffer_addr)) = buffer; + + return static_cast(aligned_buffer); + } + + void deallocate(T* p, std::size_t n) { + if (!p) { + return; + } + void* store_buffer_addr = (reinterpret_cast(p) - sizeof(void*)); + void* free_address = *(static_cast(store_buffer_addr)); + (void)n; // Avoid unused variable + std::free(free_address); + } +}; + +template +using AlignedVector64 = std::vector >; + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/avx512-util.hpp b/intel-hexl/util/avx512-util.hpp new file mode 100644 index 00000000..6f4b1939 --- /dev/null +++ b/intel-hexl/util/avx512-util.hpp @@ -0,0 +1,275 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include + +#include "intel-hexl/util/util.hpp" +#include "number-theory/number-theory.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +/// @brief Returns the unsigned 64-bit integer values in x as a vector +inline std::vector ExtractValues(__m512i x) { + __m256i x0 = _mm512_extracti64x4_epi64(x, 0); + __m256i x1 = _mm512_extracti64x4_epi64(x, 1); + + std::vector xs{static_cast(_mm256_extract_epi64(x0, 0)), + static_cast(_mm256_extract_epi64(x0, 1)), + static_cast(_mm256_extract_epi64(x0, 2)), + static_cast(_mm256_extract_epi64(x0, 3)), + static_cast(_mm256_extract_epi64(x1, 0)), + static_cast(_mm256_extract_epi64(x1, 1)), + static_cast(_mm256_extract_epi64(x1, 2)), + static_cast(_mm256_extract_epi64(x1, 3))}; + + return xs; +} + +/// @brief Returns the signed 64-bit integer values in x as a vector +inline std::vector ExtractIntValues(__m512i x) { + __m256i x0 = _mm512_extracti64x4_epi64(x, 0); + __m256i x1 = _mm512_extracti64x4_epi64(x, 1); + + std::vector xs{static_cast(_mm256_extract_epi64(x0, 0)), + static_cast(_mm256_extract_epi64(x0, 1)), + static_cast(_mm256_extract_epi64(x0, 2)), + static_cast(_mm256_extract_epi64(x0, 3)), + static_cast(_mm256_extract_epi64(x1, 0)), + static_cast(_mm256_extract_epi64(x1, 1)), + static_cast(_mm256_extract_epi64(x1, 2)), + static_cast(_mm256_extract_epi64(x1, 3))}; + + return xs; +} + +// Returns the 64-bit floating-point values in x as a vector +inline std::vector ExtractValues(__m512d x) { + std::vector ret(8, 0); + double* x_data = reinterpret_cast(&x); + for (size_t i = 0; i < 8; ++i) { + ret[i] = x_data[i]; + } + return ret; +} + +// Multiply packed unsigned BitShift-bit integers in each 64-bit element of x +// and y to form a 2*BitShift-bit intermediate result. +// Returns the high BitShift-bit unsigned integer from the intermediate result +template +inline __m512i _mm512_hexl_mulhi_epi(__m512i x, __m512i y); + +template <> +inline __m512i _mm512_hexl_mulhi_epi<64>(__m512i x, __m512i y) { + // https://stackoverflow.com/questions/28807341/simd-signed-with-unsigned-multiplication-for-64-bit-64-bit-to-128-bit + __m512i lomask = _mm512_set1_epi64(0x00000000ffffffff); + __m512i xh = + _mm512_shuffle_epi32(x, (_MM_PERM_ENUM)0xB1); // x0l, x0h, x1l, x1h + __m512i yh = + _mm512_shuffle_epi32(y, (_MM_PERM_ENUM)0xB1); // y0l, y0h, y1l, y1h + __m512i w0 = _mm512_mul_epu32(x, y); // x0l*y0l, x1l*y1l + __m512i w1 = _mm512_mul_epu32(x, yh); // x0l*y0h, x1l*y1h + __m512i w2 = _mm512_mul_epu32(xh, y); // x0h*y0l, x1h*y0l + __m512i w3 = _mm512_mul_epu32(xh, yh); // x0h*y0h, x1h*y1h + __m512i w0h = _mm512_srli_epi64(w0, 32); + __m512i s1 = _mm512_add_epi64(w1, w0h); + __m512i s1l = _mm512_and_si512(s1, lomask); + __m512i s1h = _mm512_srli_epi64(s1, 32); + __m512i s2 = _mm512_add_epi64(w2, s1l); + __m512i s2h = _mm512_srli_epi64(s2, 32); + __m512i hi1 = _mm512_add_epi64(w3, s1h); + return _mm512_add_epi64(hi1, s2h); +} + +#ifdef HEXL_HAS_AVX512IFMA +template <> +inline __m512i _mm512_hexl_mulhi_epi<52>(__m512i x, __m512i y) { + __m512i zero = _mm512_set1_epi64(0); + return _mm512_madd52hi_epu64(zero, x, y); +} +#endif + +// Multiply packed unsigned BitShift-bit integers in each 64-bit element of x +// and y to form a 104-bit intermediate result. +// Returns the low BitShift-bit unsigned integer from the intermediate result +template +inline __m512i _mm512_hexl_mullo_epi(__m512i x, __m512i y); + +template <> +inline __m512i _mm512_hexl_mullo_epi<64>(__m512i x, __m512i y) { + return _mm512_mullo_epi64(x, y); +} + +#ifdef HEXL_HAS_AVX512IFMA +template <> +inline __m512i _mm512_hexl_mullo_epi<52>(__m512i x, __m512i y) { + __m512i zero = _mm512_set1_epi64(0); + return _mm512_madd52lo_epu64(zero, x, y); +} +#endif + +// Multiply packed unsigned BitShift-bit integers in each 64-bit element of y +// and z to form a 2*BitShift-bit intermediate result. The low BitShift bits of +// the result are added to x, then the result is returned. +template +inline __m512i _mm512_hexl_mullo_add_epi(__m512i x, __m512i y, __m512i z); + +#ifdef HEXL_HAS_AVX512IFMA +template <> +inline __m512i _mm512_hexl_mullo_add_epi<52>(__m512i x, __m512i y, __m512i z) { + return _mm512_madd52lo_epu64(x, y, z); +} +#endif + +template <> +inline __m512i _mm512_hexl_mullo_add_epi<64>(__m512i x, __m512i y, __m512i z) { + __m512i prod = _mm512_mullo_epi64(y, z); + return _mm512_add_epi64(x, prod); +} + +template +inline __m512i _mm512_hexl_small_mod_epu64(__m512i x, __m512i p, + __m512i* p_times_2 = nullptr, + __m512i* p_times_4 = nullptr) { + HEXL_CHECK(InputModFactor == 1 || InputModFactor == 2 || + InputModFactor == 4 || InputModFactor == 8, + "InputModFactor must be 1, 2, 4, or 8"); + if (InputModFactor == 1) { + return x; + } + if (InputModFactor == 2) { + return _mm512_min_epu64(x, _mm512_sub_epi64(x, p)); + } + if (InputModFactor == 4) { + HEXL_CHECK(p_times_2 != nullptr, "p_times_2 must not be nullptr"); + x = _mm512_min_epu64(x, _mm512_sub_epi64(x, *p_times_2)); + return _mm512_min_epu64(x, _mm512_sub_epi64(x, p)); + } + if (InputModFactor == 8) { + HEXL_CHECK(p_times_2 != nullptr, "p_times_2 must not be nullptr"); + HEXL_CHECK(p_times_4 != nullptr, "p_times_4 must not be nullptr"); + x = _mm512_min_epu64(x, _mm512_sub_epi64(x, *p_times_4)); + x = _mm512_min_epu64(x, _mm512_sub_epi64(x, *p_times_2)); + return _mm512_min_epu64(x, _mm512_sub_epi64(x, p)); + } +} + +// Returns (x + y) mod p; assumes 0 < x, y < p +// x += y - p; +// if (x < 0) x+= p +// return x +inline __m512i _mm512_hexl_small_add_mod_epi64(__m512i x, __m512i y, + __m512i p) { + HEXL_CHECK_BOUNDS(ExtractValues(x).data(), 8, ExtractValues(p)[0]); + HEXL_CHECK_BOUNDS(ExtractValues(y).data(), 8, ExtractValues(p)[0]); + return _mm512_hexl_small_mod_epu64(_mm512_add_epi64(x, y), p); + + // __m512i v_diff = _mm512_sub_epi64(y, p); + // x = _mm512_add_epi64(x, v_diff); + // __mmask8 sign_bits = _mm512_movepi64_mask(x); + // return _mm512_mask_add_epi64(x, sign_bits, x, p); +} + +inline __mmask8 _mm512_hexl_cmp_epu64_mask(__m512i a, __m512i b, CMPINT cmp) { + switch (cmp) { + case CMPINT::EQ: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::EQ)); + case CMPINT::LT: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::LT)); + case CMPINT::LE: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::LE)); + case CMPINT::FALSE: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::FALSE)); + case CMPINT::NE: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::NE)); + case CMPINT::NLT: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::NLT)); + case CMPINT::NLE: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::NLE)); + case CMPINT::TRUE: + return _mm512_cmp_epu64_mask(a, b, static_cast(CMPINT::TRUE)); + } + __mmask8 dummy = 0; // Avoid end of non-void function warning + return dummy; +} + +// Returns c[i] = a[i] CMP b[i] ? match_value : 0 +inline __m512i _mm512_hexl_cmp_epi64(__m512i a, __m512i b, CMPINT cmp, + uint64_t match_value) { + __mmask8 mask = _mm512_hexl_cmp_epu64_mask(a, b, cmp); + return _mm512_maskz_broadcastq_epi64(mask, _mm_set1_epi64x(match_value)); +} + +// Returns c[i] = a[i] CMP b[i] ? match_value : 0 +inline __m512i _mm512_hexl_cmp_epi64(__m512i a, __m512i b, int cmp, + uint64_t match_value) { + return _mm512_hexl_cmp_epi64(a, b, static_cast(cmp), match_value); +} + +// Returns c[i] = a[i] >= b[i] ? match_value : 0 +inline __m512i _mm512_hexl_cmpge_epu64(__m512i a, __m512i b, + uint64_t match_value) { + return _mm512_hexl_cmp_epi64(a, b, CMPINT::NLT, match_value); +} + +// Returns c[i] = a[i] < b[i] ? match_value : 0 +inline __m512i _mm512_hexl_cmplt_epu64(__m512i a, __m512i b, + uint64_t match_value) { + return _mm512_hexl_cmp_epi64(a, b, CMPINT::LT, match_value); +} + +// Returns c[i] = a[i] <= b[i] ? match_value : 0 +inline __m512i _mm512_hexl_cmple_epu64(__m512i a, __m512i b, + uint64_t match_value) { + return _mm512_hexl_cmp_epi64(a, b, CMPINT::LE, match_value); +} + +// returns x mod p, computed via Barrett reduction +// @param p_barr floor(2^BitShift / p) +template +inline __m512i _mm512_hexl_barrett_reduce64(__m512i x, __m512i p, + __m512i p_barr) { + __m512i rnd1_hi = _mm512_hexl_mulhi_epi(x, p_barr); + + // Barrett subtraction + // tmp[0] = input - tmp[1] * modulus; + __m512i tmp1_times_mod = _mm512_hexl_mullo_epi<64>(rnd1_hi, p); + x = _mm512_sub_epi64(x, tmp1_times_mod); + // Correction + x = _mm512_hexl_small_mod_epu64(x, p); + return x; +} + +// Concatenate packed 64-bit integers in x and y, producing an intermediate +// 128-bit result. Shift the result right by bit_shift bits, and return the +// lower 64 bits. The bit_shift is a run-time argument, rather than a +// compile-time template parameter, so we can't use _mm512_shrdi_epi64 +inline __m512i _mm512_hexl_shrdi_epi64(__m512i x, __m512i y, int bit_shift) { + __m512i c_lo = _mm512_srli_epi64(x, bit_shift); + __m512i c_hi = _mm512_slli_epi64(y, 64 - bit_shift); + return _mm512_add_epi64(c_lo, c_hi); +} + +// Concatenate packed 64-bit integers in x and y, producing an intermediate +// 128-bit result. Shift the result right by BitShift bits, and return the lower +// 64 bits. +template +inline __m512i _mm512_hexl_shrdi_epi64(__m512i x, __m512i y) { +#ifdef HEXL_HAS_AVX512IFMA + return _mm512_shrdi_epi64(x, y, BitShift); +#else + return _mm512_hexl_shrdi_epi64(x, y, BitShift); +#endif +} + +#endif // HEXL_HAS_AVX512DQ + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/check.hpp b/intel-hexl/util/check.hpp new file mode 100644 index 00000000..1724f868 --- /dev/null +++ b/intel-hexl/util/check.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "util/types.hpp" + +// Create logging/debug macros with no run-time overhead unless HEXL_DEBUG is +// enabled +#ifdef HEXL_DEBUG +#include "logging/logging.hpp" + +#define HEXL_CHECK(cond, expr) \ + if (!(cond)) { \ + LOG(ERROR) << expr << " in fuction: " << __FUNCTION__ \ + << " in file: " __FILE__ << " at line: " << __LINE__; \ + throw std::runtime_error("Error. Check log output"); \ + } + +#define HEXL_CHECK_BOUNDS3(arg, n, bound) \ + for (size_t i = 0; i < n; ++i) { \ + HEXL_CHECK((arg)[i] < bound, "Arg[" << i << "] = " << (arg)[i] \ + << " exceeds bound " << bound); \ + } + +#define HEXL_CHECK_BOUNDS4(arg, n, bound, expr) \ + for (size_t i = 0; i < n; ++i) { \ + HEXL_CHECK((arg)[i] < bound, expr); \ + } + +// Dispatch HEXL_CHECK_BOUNDS to proper number of arguments +#define GET_MACRO(_1, _2, _3, _4, NAME, ...) NAME +#define HEXL_CHECK_BOUNDS(...) \ + GET_MACRO(__VA_ARGS__, HEXL_CHECK_BOUNDS4, HEXL_CHECK_BOUNDS3) \ + (__VA_ARGS__) + +#else // HEXL_DEBUG=OFF + +#define HEXL_CHECK(cond, expr) \ + {} +#define HEXL_CHECK_BOUNDS(...) \ + {} + +#endif // HEXL_DEBUG diff --git a/intel-hexl/util/clang.hpp b/intel-hexl/util/clang.hpp new file mode 100644 index 00000000..25f2700d --- /dev/null +++ b/intel-hexl/util/clang.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include "util/check.hpp" +#include "util/types.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_USE_CLANG +// Return x * y as 128-bit integer +// Correctness if x * y < 128 bits +inline uint128_t MultiplyUInt64(uint64_t x, uint64_t y) { + return uint128_t(x) * uint128_t(y); +} + +inline uint64_t BarrettReduce128(uint64_t input_hi, uint64_t input_lo, + uint64_t modulus) { + HEXL_CHECK(modulus != 0, "modulus == 0") + uint128_t n = (static_cast(input_hi) << 64) | + (static_cast(input_lo)); + + return n % modulus; + // TODO(fboemer): actually use barrett reduction if performance-critical +} + +// Returns low 64bit of 128b/64b where x1=high 64b, x0=low 64b +inline uint64_t DivideUInt128UInt64Lo(uint64_t x1, uint64_t x0, uint64_t y) { + uint128_t n = + (static_cast(x1) << 64) | (static_cast(x0)); + uint128_t q = n / y; + + return static_cast(q); +} + +// Multiplies x * y as 128-bit integer. +// @param prod_hi Stores high 64 bits of product +// @param prod_lo Stores low 64 bits of product +inline void MultiplyUInt64(uint64_t x, uint64_t y, uint64_t* prod_hi, + uint64_t* prod_lo) { + uint128_t prod = MultiplyUInt64(x, y); + *prod_hi = static_cast(prod >> 64); + *prod_lo = static_cast(prod); +} + +// Return the high 128 minus BitShift bits of the 128-bit product x * y +template +inline uint64_t MultiplyUInt64Hi(uint64_t x, uint64_t y) { + uint128_t product = static_cast(x) * y; + return static_cast(product >> BitShift); +} + +#define HEXL_LOOP_UNROLL_4 _Pragma("clang loop unroll_count(4)") +#define HEXL_LOOP_UNROLL_8 _Pragma("clang loop unroll_count(8)") + +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/compiler.hpp b/intel-hexl/util/compiler.hpp new file mode 100644 index 00000000..517181fe --- /dev/null +++ b/intel-hexl/util/compiler.hpp @@ -0,0 +1,12 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#ifdef HEXL_USE_MSVC +#include "util/msvc.hpp" +#elif defined HEXL_USE_GNU +#include "util/gcc.hpp" +#elif defined HEXL_USE_CLANG +#include "util/clang.hpp" +#endif diff --git a/intel-hexl/util/cpu-features.hpp b/intel-hexl/util/cpu-features.hpp new file mode 100644 index 00000000..a6370b15 --- /dev/null +++ b/intel-hexl/util/cpu-features.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "cpuinfo_x86.h" // NOLINT(build/include_subdir) + +namespace intel { +namespace hexl { + +static const cpu_features::X86Features features = + cpu_features::GetX86Info().features; +static const bool has_avx512ifma = features.avx512ifma; +static const bool has_avx512dq = + features.avx512f && features.avx512dq && features.avx512vl; + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/gcc.hpp b/intel-hexl/util/gcc.hpp new file mode 100644 index 00000000..a767da06 --- /dev/null +++ b/intel-hexl/util/gcc.hpp @@ -0,0 +1,62 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include + +#include "util/check.hpp" +#include "util/types.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_USE_GNU +// Return x * y as 128-bit integer +// Correctness if x * y < 128 bits +inline uint128_t MultiplyUInt64(uint64_t x, uint64_t y) { + return uint128_t(x) * uint128_t(y); +} + +inline uint64_t BarrettReduce128(uint64_t input_hi, uint64_t input_lo, + uint64_t modulus) { + HEXL_CHECK(modulus != 0, "modulus == 0") + uint128_t n = (static_cast(input_hi) << 64) | + (static_cast(input_lo)); + + return n % modulus; + // TODO(fboemer): actually use barrett reduction if performance-critical +} + +// Returns low 64bit of 128b/64b where x1=high 64b, x0=low 64b +inline uint64_t DivideUInt128UInt64Lo(uint64_t x1, uint64_t x0, uint64_t y) { + uint128_t n = + (static_cast(x1) << 64) | (static_cast(x0)); + uint128_t q = n / y; + + return static_cast(q); +} + +// Multiplies x * y as 128-bit integer. +// @param prod_hi Stores high 64 bits of product +// @param prod_lo Stores low 64 bits of product +inline void MultiplyUInt64(uint64_t x, uint64_t y, uint64_t* prod_hi, + uint64_t* prod_lo) { + uint128_t prod = MultiplyUInt64(x, y); + *prod_hi = static_cast(prod >> 64); + *prod_lo = static_cast(prod); +} + +// Return the high 128 minus BitShift bits of the 128-bit product x * y +template +inline uint64_t MultiplyUInt64Hi(uint64_t x, uint64_t y) { + uint128_t product = MultiplyUInt64(x, y); + return static_cast(product >> BitShift); +} + +#define HEXL_LOOP_UNROLL_4 _Pragma("GCC unroll 4") +#define HEXL_LOOP_UNROLL_8 _Pragma("GCC unroll 8") + +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/msvc.hpp b/intel-hexl/util/msvc.hpp new file mode 100644 index 00000000..43da2370 --- /dev/null +++ b/intel-hexl/util/msvc.hpp @@ -0,0 +1,87 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#ifdef HEXL_USE_MSVC +#include +#include +#include + +#include + +#pragma intrinsic(_udiv128, _umul128) + +#undef TRUE +#undef FALSE + +namespace intel { +namespace hexl { + +inline uint64_t BarrettReduce128(uint64_t input_hi, uint64_t input_lo, + uint64_t modulus) { + HEXL_CHECK(modulus != 0, "modulus == 0") + uint64_t remainder; + _udiv128(input_hi, input_lo, modulus, &remainder); + + return remainder; +} + +// Returns low 64bit of 128b/64b where x1=high 64b, x0=low 64b +inline uint64_t DivideUInt128UInt64Lo(uint64_t x1, uint64_t x0, uint64_t y) { + uint64_t remainder; + uint64_t result = _udiv128(x1, x0, y, &remainder); + return result; +} + +// Multiplies x * y as 128-bit integer. +// @param prod_hi Stores high 64 bits of product +// @param prod_lo Stores low 64 bits of product +inline void MultiplyUInt64(uint64_t x, uint64_t y, uint64_t* prod_hi, + uint64_t* prod_lo) { + *prod_lo = _umul128(x, y, prod_hi); +} + +inline void RightShift128(uint64_t* result_hi, uint64_t* result_lo, + uint64_t op_hi, uint64_t op_lo, + uint64_t shift_value) { + if (shift_value == 0) { + *result_hi = op_hi; + *result_lo = op_lo; + } else if (shift_value == 64) { + *result_hi = 0ULL; + *result_lo = op_hi; + } else if (shift_value == 128) { + *result_hi = 0ULL; + *result_lo = 0ULL; + } else if (shift_value >= 1 && shift_value <= 63) { + *result_hi = op_hi >> shift_value; + *result_lo = (op_hi << (64 - shift_value)) | (op_lo >> shift_value); + } else if (shift_value >= 65 && shift_value < 128) { + *result_hi = 0ULL; + *result_lo = op_hi >> (shift_value - 64); + } +} + +// Return the high 128 minus BitShift bits of the 128-bit product x * y +template +inline uint64_t MultiplyUInt64Hi(uint64_t x, uint64_t y) { + HEXL_CHECK(BitShift == 52 || BitShift == 64, + "Invalid BitShift " << BitShift << "; expected 52 or 64"); + uint64_t prod_hi; + uint64_t prod_lo = _umul128(x, y, &prod_hi); + uint64_t result_hi; + uint64_t result_lo; + RightShift128(&result_hi, &result_lo, prod_hi, prod_lo, BitShift); + return result_lo; +} + +#define HEXL_LOOP_UNROLL_4 \ + {} +#define HEXL_LOOP_UNROLL_8 \ + {} + +#endif + +} // namespace hexl +} // namespace intel diff --git a/intel-hexl/util/types.hpp b/intel-hexl/util/types.hpp new file mode 100644 index 00000000..7d4b0fac --- /dev/null +++ b/intel-hexl/util/types.hpp @@ -0,0 +1,11 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#if defined(HEXL_USE_GNU) || defined(HEXL_USE_CLANG) +__extension__ typedef __int128 int128_t; +__extension__ typedef unsigned __int128 uint128_t; +#endif diff --git a/intel-hexl/util/util-internal.hpp b/intel-hexl/util/util-internal.hpp new file mode 100644 index 00000000..5ad1b69e --- /dev/null +++ b/intel-hexl/util/util-internal.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +#include "intel-hexl/util/util.hpp" + +namespace intel { +namespace hexl { + +inline bool Compare(CMPINT cmp, uint64_t lhs, uint64_t rhs) { + switch (cmp) { + case CMPINT::EQ: + return lhs == rhs; + case CMPINT::LT: + return lhs < rhs; + break; + case CMPINT::LE: + return lhs <= rhs; + break; + case CMPINT::FALSE: + return false; + break; + case CMPINT::NE: + return lhs != rhs; + break; + case CMPINT::NLT: + return lhs >= rhs; + break; + case CMPINT::NLE: + return lhs > rhs; + case CMPINT::TRUE: + return true; + default: + return true; + } +} + +} // namespace hexl +} // namespace intel diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 00000000..63a3e964 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set(SRC main.cpp + test-aligned-vector.cpp + test-avx512-util.cpp + test-number-theory.cpp + test-ntt.cpp + test-eltwise-mult-mod.cpp + test-eltwise-reduce-mod.cpp + test-eltwise-add-mod.cpp + test-eltwise-fma-mod.cpp + test-eltwise-cmp-add.cpp + test-eltwise-cmp-sub-mod.cpp +) + +add_executable(unit-test ${SRC}) + +if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + target_compile_options(unit-test PRIVATE -Wall -Wextra -march=native) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + # Workaround for unresolved symbols, shouldn't be needed + if (HEXL_DEBUG) + target_link_libraries(unit-test PRIVATE gflags) + endif() + # Disable inline, due to incorect optimization in ExtractValues, causing failing tests in Windows AVX512 in Release mode with HEXL_DEBUG=OFF + target_compile_options(unit-test PRIVATE /Wall /W4 /Zc:preprocessor /Ob0) +endif() + +target_include_directories(unit-test PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${HEXL_SRC_ROOT_DIR} # Private headers +) +target_link_libraries(unit-test PRIVATE intel_hexl gtest Threads::Threads) +if (HEXL_DEBUG) + target_link_libraries(unit-test PRIVATE easyloggingpp) +endif() + +# Make sure that public include folder doesn't use private headers +# and that public headers are self-contained +add_executable(test-public-api test-public-api.cpp) +add_dependencies(test-public-api intel_hexl) +target_include_directories(test-public-api PRIVATE ${HEXL_INC_ROOT_DIR}) diff --git a/test/main.cpp b/test/main.cpp new file mode 100644 index 00000000..42d32350 --- /dev/null +++ b/test/main.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "logging/logging.hpp" + +int main(int argc, char** argv) { + START_EASYLOGGINGPP(argc, argv); + + ::testing::InitGoogleTest(&argc, argv); + int rc = RUN_ALL_TESTS(); + return rc; +} diff --git a/test/test-aligned-vector.cpp b/test/test-aligned-vector.cpp new file mode 100644 index 00000000..97562c7c --- /dev/null +++ b/test/test-aligned-vector.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" +#include "logging/logging.hpp" +#include "test-util.hpp" +#include "util/aligned-allocator.hpp" +#include "util/types.hpp" + +namespace intel { +namespace hexl { + +TEST(AlignedVector64, alloc) { + AlignedVector64 x{1, 2, 3, 4}; + ASSERT_EQ(reinterpret_cast(x.data()) % 64, 0); +} + +TEST(AlignedVector64, assignment) { + AlignedVector64 x{1, 2, 3, 4}; + AlignedVector64 y = x; + ASSERT_EQ(reinterpret_cast(x.data()) % 64, 0); + ASSERT_EQ(reinterpret_cast(y.data()) % 64, 0); + ASSERT_EQ(x, y); +} + +TEST(AlignedVector64, move_assignment) { + AlignedVector64 x{1, 2, 3, 4}; + AlignedVector64 y = std::move(x); + ASSERT_EQ(reinterpret_cast(x.data()) % 64, 0); + ASSERT_EQ(reinterpret_cast(y.data()) % 64, 0); + ASSERT_EQ(y, (AlignedVector64{1, 2, 3, 4})); +} + +TEST(AlignedVector64, copy_constructor) { + AlignedVector64 x{1, 2, 3, 4}; + AlignedVector64 y{x}; + ASSERT_EQ(reinterpret_cast(x.data()) % 64, 0); + ASSERT_EQ(reinterpret_cast(y.data()) % 64, 0); + ASSERT_EQ(y, (AlignedVector64{1, 2, 3, 4})); +} + +TEST(AlignedVector64, move_constructor) { + AlignedVector64 x{1, 2, 3, 4}; + AlignedVector64 y{std::move(x)}; + ASSERT_EQ(reinterpret_cast(x.data()) % 64, 0); + ASSERT_EQ(reinterpret_cast(y.data()) % 64, 0); + ASSERT_EQ(y, (AlignedVector64{1, 2, 3, 4})); +} + +} // namespace hexl +} // namespace intel diff --git a/test/test-avx512-util.cpp b/test/test-avx512-util.cpp new file mode 100644 index 00000000..682f78d4 --- /dev/null +++ b/test/test-avx512-util.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "test-util.hpp" +#include "util/avx512-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_HAS_AVX512DQ + +TEST(AVX512, ExtractValues) { + __m512i x = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + + AssertEqual(ExtractValues(x), std::vector{8, 7, 6, 5, 4, 3, 2, 1}); +} + +TEST(AVX512, ExtractIntValues) { + __m512i x = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + AssertEqual(ExtractIntValues(x), + std::vector{8, 7, 6, 5, 4, 3, 2, 1}); +} + +TEST(AVX512, ExtractDoubleValues) { + __m512d x = _mm512_set_pd(-4.4, -3.3, -2.2, -1.1, 0, 1.1, 2.2, 3.3); + AssertEqual(ExtractValues(x), + std::vector{3.3, 2.2, 1.1, 0, -1.1, -2.2, -3.3, -4.4}); +} +#endif + +#ifdef HEXL_HAS_AVX512IFMA +TEST(AVX512, _mm512_hexl_mulhi_epi52) { + __m512i w = _mm512_set_epi64(90774764920991, 90774764920991, 90774764920991, + 90774764920991, 90774764920991, 90774764920991, + 90774764920991, 90774764920991); + __m512i y = _mm512_set_epi64(424, 635, 757, 457, 280, 624, 353, 496); + + __m512i expected = _mm512_set_epi64(8, 12, 15, 9, 5, 12, 7, 9); + + __m512i z = _mm512_hexl_mulhi_epi<52>(w, y); + + ASSERT_TRUE(Equals(z, expected)); +} +#endif + +#ifdef HEXL_HAS_AVX512DQ +TEST(AVX512, _mm512_hexl_cmplt_epu64) { + // Small + { + uint64_t match_value = 10; + __m512i a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + __m512i b = _mm512_set_epi64(0, 1, 1, 0, 5, 6, 100, 100); + __m512i expected_out = _mm512_set_epi64( + 0, 0, 0, 0, match_value, match_value, match_value, match_value); + + __m512i c = _mm512_hexl_cmplt_epu64(a, b, match_value); + + CheckEqual(c, expected_out); + } + + // Large + { + uint64_t match_value = 13; + __m512i a = _mm512_set_epi64(1ULL << 32, // + 1ULL << 63, // + (1ULL << 63) + 1, // + (1ULL << 63) + 10, // + 0, // + 0, // + 0, // + 0); + __m512i b = _mm512_set_epi64(1ULL << 32, // + 1ULL << 63, // + 1ULL << 63, // + (1ULL << 63) + 17, // + 0, // + 0, // + 0, // + 0); + __m512i expected_out = _mm512_set_epi64(0, 0, 0, match_value, 0, 0, 0, 0); + + __m512i c = _mm512_hexl_cmplt_epu64(a, b, match_value); + + CheckEqual(c, expected_out); + } +} + +TEST(AVX512, _mm512_hexl_cmpge_epu64) { + // Small + { + uint64_t match_value = 10; + __m512i a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + __m512i b = _mm512_set_epi64(0, 1, 1, 0, 5, 6, 100, 100); + __m512i expected_out = _mm512_set_epi64( + match_value, match_value, match_value, match_value, 0, 0, 0, 0); + + __m512i c = _mm512_hexl_cmpge_epu64(a, b, match_value); + + CheckEqual(c, expected_out); + } + + // Large + { + uint64_t match_value = 13; + __m512i a = _mm512_set_epi64(1ULL << 32, // + 1ULL << 63, // + (1ULL << 63) + 1, // + (1ULL << 63) + 10, // + 0, // + 0, // + 0, // + 0); + __m512i b = _mm512_set_epi64(1ULL << 32, // + 1ULL << 63, // + 1ULL << 63, // + (1ULL << 63) + 17, // + 0, // + 0, // + 0, // + 0); + __m512i expected_out = + _mm512_set_epi64(match_value, match_value, match_value, 0, match_value, + match_value, match_value, match_value); + + __m512i c = _mm512_hexl_cmpge_epu64(a, b, match_value); + + CheckEqual(c, expected_out); + } +} + +TEST(AVX512, _mm512_hexl_small_mod_epu64) { + // Small + { + __m512i a = _mm512_set_epi64(0, 2, 4, 6, 8, 10, 11, 12); + __m512i mods = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + __m512i expected_out = _mm512_set_epi64(0, 0, 1, 2, 3, 4, 4, 4); + + __m512i c = _mm512_hexl_small_mod_epu64(a, mods); + + CheckEqual(c, expected_out); + } + + // Large + { + __m512i a = _mm512_set_epi64(1ULL << 32, // + 1ULL << 63, // + (1ULL << 63) + 1, // + (1ULL << 63) + 10, // + 0, // + 0, // + 0, // + 0); + __m512i mods = _mm512_set_epi64(1ULL << 32, // + 1ULL << 63, // + 1ULL << 63, // + (1ULL << 63) + 17, // + 0, // + 0, // + 0, // + 0); + __m512i expected_out = + _mm512_set_epi64(0, 0, 1, (1ULL << 63) + 10, 0, 0, 0, 0); + + __m512i c = _mm512_hexl_small_mod_epu64(a, mods); + + CheckEqual(c, expected_out); + } +} + +TEST(AVX512, _mm512_hexl_barrett_reduce64) { + // Small + { + __m512i a = _mm512_set_epi64(12, 11, 10, 8, 6, 4, 2, 0); + + std::vector mods{2, 2, 3, 4, 5, 6, 7, 8}; + std::vector barrs(mods.size()); + for (size_t i = 0; i < barrs.size(); ++i) { + barrs[i] = MultiplyFactor(1, 64, mods[i]).BarrettFactor(); + } + + __m512i vmods = _mm512_set_epi64(mods[7], mods[6], mods[5], mods[4], + mods[3], mods[2], mods[1], mods[0]); + __m512i vbarrs = _mm512_set_epi64(barrs[7], barrs[6], barrs[5], barrs[4], + barrs[3], barrs[2], barrs[1], barrs[0]); + + __m512i expected_out = _mm512_set_epi64(4, 4, 4, 3, 2, 1, 0, 0); + + __m512i c = _mm512_hexl_barrett_reduce64(a, vmods, vbarrs); + AssertEqual(c, expected_out); + } + + // Random + { + std::random_device rd; + std::mt19937 gen(rd()); + + uint64_t modulus = 75; + std::uniform_int_distribution distrib(50, modulus * modulus - 1); + __m512i vmod = _mm512_set1_epi64(modulus); + __m512i vbarr = + _mm512_set1_epi64(MultiplyFactor(1, 64, modulus).BarrettFactor()); + + for (size_t trial = 0; trial < 200; ++trial) { + std::vector arg1(8, 0); + std::vector exp(8, 0); + for (size_t i = 0; i < 8; ++i) { + arg1[i] = distrib(gen); + exp[i] = arg1[i] % modulus; + } + __m512i varg1 = _mm512_set_epi64(arg1[7], arg1[6], arg1[5], arg1[4], + arg1[3], arg1[2], arg1[1], arg1[0]); + + __m512i c = _mm512_hexl_barrett_reduce64(varg1, vmod, vbarr); + std::vector result = ExtractValues(c); + + ASSERT_EQ(result, exp); + } + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/test/test-eltwise-add-mod.cpp b/test/test-eltwise-add-mod.cpp new file mode 100644 index 00000000..81dbdec2 --- /dev/null +++ b/test/test-eltwise-add-mod.cpp @@ -0,0 +1,144 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "eltwise/eltwise-add-mod-avx512.hpp" +#include "eltwise/eltwise-add-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-add-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_DEBUG +TEST(EltwiseAdd, bad_input) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{1, 3, 5, 7, 9, 2, 4, 6}; + std::vector big_input{11, 12, 13, 14, 15, 16, 17, 18}; + uint64_t modulus = 10; + + EXPECT_ANY_THROW( + EltwiseAddMod(nullptr, op1.data(), op2.data(), op1.size(), modulus)); + EXPECT_ANY_THROW( + EltwiseAddMod(op1.data(), nullptr, op2.data(), op1.size(), modulus)); + EXPECT_ANY_THROW( + EltwiseAddMod(op1.data(), op1.data(), nullptr, op1.size(), modulus)); + EXPECT_ANY_THROW( + EltwiseAddMod(op1.data(), op1.data(), op2.data(), 0, modulus)); + EXPECT_ANY_THROW( + EltwiseAddMod(op1.data(), op1.data(), op2.data(), op1.size(), 1)); + EXPECT_ANY_THROW(EltwiseAddMod(op1.data(), big_input.data(), op2.data(), + op1.size(), modulus)); + EXPECT_ANY_THROW(EltwiseAddMod(op1.data(), op1.data(), big_input.data(), + op1.size(), modulus)); +} +#endif + +TEST(EltwiseAdd, native_small) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{1, 3, 5, 7, 9, 4, 4, 6}; + std::vector exp_out{2, 5, 8, 1, 4, 0, 1, 4}; + uint64_t modulus = 10; + + EltwiseAddModNative(op1.data(), op1.data(), op2.data(), op1.size(), modulus); + + CheckEqual(op1, exp_out); +} + +TEST(EltwiseAdd, native_big) { + uint64_t modulus = GeneratePrimes(1, 60, 1024)[0]; + + std::vector op1{modulus - 1, modulus - 1, modulus - 2, modulus - 2, + modulus - 3, modulus - 3, modulus - 4, modulus - 4}; + std::vector op2{modulus - 1, modulus - 2, modulus - 3, modulus - 4, + modulus - 5, modulus - 6, modulus - 7, modulus - 8}; + std::vector exp_out{modulus - 2, modulus - 3, modulus - 5, + modulus - 6, modulus - 8, modulus - 9, + modulus - 11, modulus - 12}; + + EltwiseAddModNative(op1.data(), op1.data(), op2.data(), op1.size(), modulus); + + CheckEqual(op1, exp_out); +} + +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseAdd, avx512_small) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{1, 3, 5, 7, 9, 2, 4, 6}; + std::vector exp_out{2, 5, 8, 1, 4, 8, 1, 4}; + uint64_t modulus = 10; + EltwiseAddModAVX512(op1.data(), op1.data(), op2.data(), op1.size(), modulus); + + CheckEqual(op1, exp_out); +} + +TEST(EltwiseAdd, avx512_big) { + uint64_t modulus = GeneratePrimes(1, 60, 1024)[0]; + + std::vector op1{modulus - 1, modulus - 1, modulus - 2, modulus - 2, + modulus - 3, modulus - 3, modulus - 4, modulus - 4}; + std::vector op2{modulus - 1, modulus - 2, modulus - 3, modulus - 4, + modulus - 5, modulus - 6, modulus - 7, modulus - 8}; + std::vector exp_out{modulus - 2, modulus - 3, modulus - 5, + modulus - 6, modulus - 8, modulus - 9, + modulus - 11, modulus - 12}; + + EltwiseAddModAVX512(op1.data(), op1.data(), op2.data(), op1.size(), modulus); + + CheckEqual(op1, exp_out); +} +#endif + +// Checks AVX512 and native eltwise add implementations match +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseAdd, AVX512Big) { + std::random_device rd; + std::mt19937 gen(rd()); + + size_t length = 173; + + for (size_t bits = 1; bits <= 62; ++bits) { + uint64_t modulus = 1ULL << bits; + + std::uniform_int_distribution distrib(0, modulus - 1); + +#ifdef HEXL_DEBUG + size_t num_trials = 10; +#else + size_t num_trials = 100; +#endif + + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector op1(length, 0); + std::vector op2(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + op2[i] = distrib(gen); + } + op1[0] = modulus - 1; + op2[0] = modulus - 1; + + auto op1a = op1; + + EltwiseAddModNative(op1.data(), op1.data(), op2.data(), op1.size(), + modulus); + EltwiseAddModAVX512(op1a.data(), op1a.data(), op2.data(), op1.size(), + modulus); + + ASSERT_EQ(op1, op1a); + ASSERT_EQ(op1[0], modulus - 2); + ASSERT_EQ(op1a[0], modulus - 2); + } + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/test/test-eltwise-cmp-add.cpp b/test/test-eltwise-cmp-add.cpp new file mode 100644 index 00000000..d1caa746 --- /dev/null +++ b/test/test-eltwise-cmp-add.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "eltwise/eltwise-cmp-add-avx512.hpp" +#include "eltwise/eltwise-cmp-add-internal.hpp" +#include "intel-hexl/eltwise/eltwise-cmp-add.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_DEBUG +TEST(EltwiseCmpAdd, null) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + + EXPECT_ANY_THROW( + EltwiseCmpAdd(nullptr, op1.data(), CMPINT::EQ, 1, 1, op1.size())); + EXPECT_ANY_THROW( + EltwiseCmpAdd(op1.data(), nullptr, CMPINT::EQ, 1, 1, op1.size())); + EXPECT_ANY_THROW( + EltwiseCmpAdd(op1.data(), op1.data(), CMPINT::EQ, 1, 0, op1.size())); + EXPECT_ANY_THROW(EltwiseCmpAdd(op1.data(), op1.data(), CMPINT::EQ, 1, 1, 0)); +} +#endif + +// Parameters = (input, cmp, bound, diff, expected_output) +class EltwiseCmpAddTest + : public ::testing::TestWithParam< + std::tuple, CMPINT, uint64_t, uint64_t, + std::vector>> { + protected: + void SetUp() {} + + void TearDown() {} + + public: +}; + +// Test Native implementation +TEST_P(EltwiseCmpAddTest, Native) { + std::vector input = std::get<0>(GetParam()); + CMPINT cmp = std::get<1>(GetParam()); + uint64_t bound = std::get<2>(GetParam()); + uint64_t diff = std::get<3>(GetParam()); + std::vector exp_output = std::get<4>(GetParam()); + + EltwiseCmpAddNative(input.data(), input.data(), cmp, bound, diff, + input.size()); + + CheckEqual(input, exp_output); +} + +INSTANTIATE_TEST_SUITE_P( + EltwiseCmpAddTest, EltwiseCmpAddTest, + ::testing::Values( + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::EQ, + 4, 5, std::vector{1, 2, 3, 9, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::LT, + 4, 5, std::vector{6, 7, 8, 4, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::LE, + 4, 5, std::vector{6, 7, 8, 9, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, + CMPINT::FALSE, 4, 5, + std::vector{1, 2, 3, 4, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::NE, + 4, 5, std::vector{6, 7, 8, 4, 10, 11, 12}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::NLT, + 4, 5, std::vector{1, 2, 3, 9, 10, 11, 12}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::NLE, + 4, 5, std::vector{1, 2, 3, 4, 10, 11, 12}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, + CMPINT::TRUE, 4, 5, + std::vector{6, 7, 8, 9, 10, 11, 12}))); + +// Checks AVX512 and native implementations match +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseCmpAdd, AVX512) { + uint64_t length = 1025; + std::random_device rd; + std::mt19937 gen(rd()); + + std::uniform_int_distribution distrib(0, 100); + + for (size_t cmp = 0; cmp < 8; ++cmp) { + for (size_t trial = 0; trial < 200; ++trial) { + std::vector op1(length, 0); + uint64_t bound = distrib(gen); + uint64_t diff = distrib(gen) + 1; + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + } + std::vector op1a = op1; + std::vector op1b = op1; + std::vector op1_out(op1.size(), 0); + std::vector op1a_out(op1.size(), 0); + std::vector op1b_out(op1.size(), 0); + + EltwiseCmpAdd(op1_out.data(), op1.data(), static_cast(cmp), bound, + diff, op1.size()); + EltwiseCmpAddNative(op1a_out.data(), op1a.data(), + static_cast(cmp), bound, diff, op1a.size()); + EltwiseCmpAddAVX512(op1b_out.data(), op1b.data(), + static_cast(cmp), bound, diff, op1b.size()); + + ASSERT_EQ(op1_out, op1a_out); + ASSERT_EQ(op1_out, op1b_out); + } + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/test/test-eltwise-cmp-sub-mod.cpp b/test/test-eltwise-cmp-sub-mod.cpp new file mode 100644 index 00000000..59e74476 --- /dev/null +++ b/test/test-eltwise-cmp-sub-mod.cpp @@ -0,0 +1,131 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "eltwise/eltwise-cmp-sub-mod-avx512.hpp" +#include "eltwise/eltwise-cmp-sub-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-cmp-sub-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_DEBUG +TEST(EltwiseCmpSubMod, null) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + + EXPECT_ANY_THROW( + EltwiseCmpSubMod(nullptr, op1.data(), CMPINT::EQ, 1, 1, 10, op1.size())); + EXPECT_ANY_THROW( + EltwiseCmpSubMod(op1.data(), nullptr, CMPINT::EQ, 1, 1, 10, op1.size())); + EXPECT_ANY_THROW(EltwiseCmpSubMod(op1.data(), op1.data(), CMPINT::EQ, 1, 0, + 10, op1.size())); + EXPECT_ANY_THROW(EltwiseCmpSubMod(op1.data(), op1.data(), CMPINT::EQ, 1, 0, 0, + op1.size())); + EXPECT_ANY_THROW( + EltwiseCmpSubMod(op1.data(), op1.data(), CMPINT::EQ, 1, 1, 10, 0)); +} +#endif + +// Parameters = (input, cmp, bound, diff, modulus, expected_output) +class EltwiseCmpSubModTest + : public ::testing::TestWithParam< + std::tuple, CMPINT, uint64_t, uint64_t, + uint64_t, std::vector>> { + protected: + void SetUp() {} + + void TearDown() {} + + public: +}; + +// Test Native implementation +TEST_P(EltwiseCmpSubModTest, Native) { + std::vector input = std::get<0>(GetParam()); + CMPINT cmp = std::get<1>(GetParam()); + uint64_t bound = std::get<2>(GetParam()); + uint64_t diff = std::get<3>(GetParam()); + uint64_t modulus = std::get<4>(GetParam()); + std::vector exp_output = std::get<5>(GetParam()); + + EltwiseCmpSubModNative(input.data(), input.data(), cmp, bound, diff, modulus, + input.size()); + + CheckEqual(input, exp_output); +} + +INSTANTIATE_TEST_SUITE_P( + EltwiseCmpSubModTest, EltwiseCmpSubModTest, + ::testing::Values( + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::EQ, + 4, 5, 10, std::vector{1, 2, 3, 9, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::LT, + 4, 5, 10, std::vector{6, 7, 8, 4, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::LE, + 4, 5, 10, std::vector{6, 7, 8, 9, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, + CMPINT::FALSE, 4, 5, 10, + std::vector{1, 2, 3, 4, 5, 6, 7}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::NE, + 4, 5, 10, std::vector{6, 7, 8, 4, 0, 1, 2}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::NLT, + 4, 5, 10, std::vector{1, 2, 3, 9, 0, 1, 2}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, CMPINT::NLE, + 4, 5, 10, std::vector{1, 2, 3, 4, 0, 1, 2}), + std::make_tuple(std::vector{1, 2, 3, 4, 5, 6, 7}, + CMPINT::TRUE, 4, 5, 10, + std::vector{6, 7, 8, 9, 0, 1, 2}))); + +// Checks AVX512 and native implementations match +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseCmpSubMod, AVX512) { + uint64_t length = 172; + std::random_device rd; + std::mt19937 gen(rd()); + + for (size_t cmp = 0; cmp < 8; ++cmp) { + for (size_t bits = 48; bits <= 51; ++bits) { + uint64_t prime = GeneratePrimes(1, bits, 1024)[0]; + std::uniform_int_distribution distrib(0, prime - 1); + + for (size_t trial = 0; trial < 200; ++trial) { + std::vector op1(length, 0); + uint64_t bound = distrib(gen); + uint64_t diff = distrib(gen); + std::vector op3(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + op3[i] = distrib(gen); + } + std::vector op1a = op1; + std::vector op1b = op1; + std::vector op1_out(op1.size(), 0); + std::vector op1a_out(op1.size(), 0); + std::vector op1b_out(op1.size(), 0); + + EltwiseCmpSubMod(op1_out.data(), op1.data(), static_cast(cmp), + bound, diff, prime, op1.size()); + EltwiseCmpSubModNative(op1a_out.data(), op1a.data(), + static_cast(cmp), bound, diff, prime, + op1a.size()); + EltwiseCmpSubModAVX512(op1b_out.data(), op1b.data(), + static_cast(cmp), bound, diff, prime, + op1b.size()); + + ASSERT_EQ(op1_out, op1a_out); + ASSERT_EQ(op1_out, op1b_out); + } + } + } +} +#endif +} // namespace hexl +} // namespace intel diff --git a/test/test-eltwise-fma-mod.cpp b/test/test-eltwise-fma-mod.cpp new file mode 100644 index 00000000..77a3dc8b --- /dev/null +++ b/test/test-eltwise-fma-mod.cpp @@ -0,0 +1,313 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "eltwise/eltwise-fma-mod-avx512.hpp" +#include "eltwise/eltwise-fma-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-fma-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_DEBUG +TEST(EltwiseFMAMod, null) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8}; + uint64_t arg2 = 1; + std::vector arg3{9, 10, 11, 12, 13, 14, 15, 16}; + std::vector exp_out{10, 12, 14, 16, 18, 20, 22, 24}; + uint64_t modulus = 769; + std::vector big_input(op1.size(), modulus); + + EXPECT_ANY_THROW(EltwiseFMAMod(nullptr, arg1.data(), arg2, arg3.data(), + arg1.size(), modulus, 1)); + EXPECT_ANY_THROW(EltwiseFMAMod(arg1.data(), nullptr, arg2, arg3.data(), + arg1.size(), modulus, 1)); + EXPECT_ANY_THROW(EltwiseFMAMod(arg1.data(), arg1.data(), arg2, arg3.data(), 0, + modulus, 1)); + EXPECT_ANY_THROW(EltwiseFMAMod(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), 1, 1)); + EXPECT_ANY_THROW(EltwiseFMAMod(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), 1, 99)); + EXPECT_ANY_THROW(EltwiseFMAMod(arg1.data(), big_input.data(), arg2, + arg3.data(), arg1.size(), modulus, 1)); + EXPECT_ANY_THROW(EltwiseFMAMod(arg1.data(), arg1.data(), arg2, + big_input.data(), arg1.size(), modulus, 1)); +} +#endif + +TEST(EltwiseFMAMod, small) { + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8}; + uint64_t arg2 = 1; + std::vector arg3{9, 10, 11, 12, 13, 14, 15, 16}; + std::vector exp_out{10, 12, 14, 16, 18, 20, 22, 24}; + uint64_t modulus = 769; + + EltwiseFMAMod(arg1.data(), arg1.data(), arg2, arg3.data(), arg1.size(), + modulus, 1); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, native_null) { + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint64_t arg2 = 1; + std::vector exp_out{1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint64_t modulus = 769; + + EltwiseFMAMod(arg1.data(), arg1.data(), arg2, nullptr, arg1.size(), modulus, + 1); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, mult_input_mod_factor) { + uint64_t modulus = 101; + + for (uint64_t input_mod_factor = 1; input_mod_factor <= 8; + input_mod_factor *= 2) { + uint64_t arg1_add = (input_mod_factor - 1) * modulus; + std::vector arg1{arg1_add + 1, arg1_add + 2, arg1_add + 3, + arg1_add + 4, arg1_add + 5, arg1_add + 6, + arg1_add + 7, arg1_add + 8, arg1_add + 9, + arg1_add + 10, arg1_add + 11, arg1_add + 12, + arg1_add + 13, arg1_add + 14, arg1_add + 15, + arg1_add + 16, arg1_add + 17}; + + uint64_t arg2 = 72; + std::vector arg3{17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33}; + std::vector exp_out{89, 61, 33, 5, 78, 50, 22, 95, 67, + 39, 11, 84, 56, 28, 0, 73, 45}; + + EltwiseFMAMod(arg1.data(), arg1.data(), arg2, arg3.data(), arg1.size(), + modulus, input_mod_factor); + + CheckEqual(arg1, exp_out); + } +} + +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseFMAMod, avx512_small) { + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8}; + uint64_t arg2 = 2; + std::vector arg3{1, 1, 1, 1, 2, 3, 1, 0}; + std::vector exp_out{3, 5, 7, 9, 12, 15, 15, 16}; + + uint64_t modulus = 101; + EltwiseFMAModAVX512<64, 1>(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), modulus); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, avx512_small2) { + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8}; + uint64_t arg2 = 17; + std::vector arg3{9, 10, 11, 12, 13, 14, 15, 16}; + std::vector exp_out{26, 44, 62, 80, 98, 15, 33, 51}; + + uint64_t modulus = 101; + + EltwiseFMAModAVX512<64, 1>(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), modulus); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, avx512_mult1) { + std::vector arg1{1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + uint64_t arg2 = 17; + std::vector arg3{17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32}; + std::vector exp_out{34, 52, 70, 88, 5, 23, 41, 59, + 77, 95, 12, 30, 48, 66, 84, 1}; + + uint64_t modulus = 101; + + EltwiseFMAModAVX512<64, 1>(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), modulus); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, avx512_mult2) { + std::vector arg1{102, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + uint64_t arg2 = 17; + std::vector arg3{17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32}; + std::vector exp_out{34, 52, 70, 88, 5, 23, 41, 59, + 77, 95, 12, 30, 48, 66, 84, 1}; + + uint64_t modulus = 101; + + EltwiseFMAModAVX512<64, 2>(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), modulus); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, avx512_mult4) { + std::vector arg1{400, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + uint64_t arg2 = 17; + std::vector arg3{17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32}; + std::vector exp_out{50, 52, 70, 88, 5, 23, 41, 59, + 77, 95, 12, 30, 48, 66, 84, 1}; + + uint64_t modulus = 101; + + EltwiseFMAModAVX512<64, 4>(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), modulus); + + CheckEqual(arg1, exp_out); +} + +TEST(EltwiseFMAMod, avx512_mult8) { + std::vector arg1{800, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + uint64_t arg2 = 17; + std::vector arg3{17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32}; + std::vector exp_out{83, 52, 70, 88, 5, 23, 41, 59, + 77, 95, 12, 30, 48, 66, 84, 1}; + + uint64_t modulus = 101; + + EltwiseFMAModAVX512<64, 8>(arg1.data(), arg1.data(), arg2, arg3.data(), + arg1.size(), modulus); + + CheckEqual(arg1, exp_out); +} +#endif + +// Check AVX512DQ and native eltwise FMA implementations match +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseFMAMod, AVX512DQ) { + uint64_t length = 1031; + std::random_device rd; + std::mt19937 gen(rd()); + + for (size_t input_mod_factor = 1; input_mod_factor <= 8; + input_mod_factor *= 2) { + for (size_t bits = 1; bits <= 60; ++bits) { + uint64_t modulus = (1ULL << bits) + 7; + std::uniform_int_distribution distrib( + 0, input_mod_factor * modulus - 1); + +#ifdef HEXL_DEBUG + size_t num_trials = 100; +#else + size_t num_trials = 1000; +#endif + + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector arg1(length, 0); + uint64_t arg2 = distrib(gen); + std::vector arg3(length, 0); + for (size_t i = 0; i < length; ++i) { + arg1[i] = distrib(gen); + arg3[i] = distrib(gen); + } + std::vector out_default(length, 0); + std::vector out_native(length, 0); + std::vector out_avx(length, 0); + + uint64_t* arg3_data = (trial % 2 == 0) ? arg3.data() : nullptr; + + EltwiseFMAMod(out_default.data(), arg1.data(), arg2, arg3_data, + arg1.size(), modulus, input_mod_factor); + + switch (input_mod_factor) { + case 1: + EltwiseFMAModNative<1>(out_native.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + EltwiseFMAModAVX512<64, 1>(out_avx.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + break; + case 2: + EltwiseFMAModNative<2>(out_native.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + EltwiseFMAModAVX512<64, 2>(out_avx.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + break; + case 4: + EltwiseFMAModNative<4>(out_native.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + EltwiseFMAModAVX512<64, 4>(out_avx.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + break; + case 8: + EltwiseFMAModNative<8>(out_native.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + EltwiseFMAModAVX512<64, 8>(out_avx.data(), arg1.data(), arg2, + arg3_data, arg1.size(), modulus); + break; + } + + ASSERT_EQ(out_default, out_native); + ASSERT_EQ(out_default, out_avx); + } + } + } +} + +#endif + +// Checks AVX512IFMA and native eltwise FMA implementations match +#ifdef HEXL_HAS_AVX512IFMA +TEST(EltwiseFMAMod, AVX512) { + uint64_t length = 1024; + std::random_device rd; + std::mt19937 gen(rd()); + + constexpr uint64_t input_mod_factor = 8; + + for (size_t bits = 48; bits <= 51; ++bits) { + uint64_t prime = GeneratePrimes(1, bits, length)[0]; + std::uniform_int_distribution distrib( + 0, input_mod_factor * prime - 1); + + for (size_t trial = 0; trial < 1000; ++trial) { + std::vector arg1(length, 0); + uint64_t arg2 = distrib(gen) % prime; + std::vector arg3(length, 0); + for (size_t i = 0; i < length; ++i) { + arg1[i] = distrib(gen); + arg3[i] = distrib(gen); + } + std::vector arg1a = arg1; + std::vector arg1b = arg1; + + uint64_t* arg3_data = (trial % 2 == 0) ? arg3.data() : nullptr; + + EltwiseFMAMod(arg1.data(), arg1.data(), arg2, arg3_data, arg1.size(), + prime, input_mod_factor); + + EltwiseFMAModAVX512<52, input_mod_factor>( + arg1a.data(), arg1a.data(), arg2, arg3_data, arg1.size(), prime); + + EltwiseFMAModAVX512<64, input_mod_factor>( + arg1b.data(), arg1b.data(), arg2, arg3_data, arg1.size(), prime); + + ASSERT_EQ(arg1, arg1a); + ASSERT_EQ(arg1, arg1b); + } + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/test/test-eltwise-mult-mod.cpp b/test/test-eltwise-mult-mod.cpp new file mode 100644 index 00000000..8093d68e --- /dev/null +++ b/test/test-eltwise-mult-mod.cpp @@ -0,0 +1,427 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "eltwise/eltwise-mult-mod-avx512.hpp" +#include "eltwise/eltwise-mult-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-mult-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_DEBUG +TEST(EltwiseMultMod, null) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{1, 2, 3, 4, 5, 6, 7, 8}; + uint64_t modulus = 769; + std::vector big_input(op1.size(), modulus); + + EXPECT_ANY_THROW( + EltwiseMultMod(nullptr, op1.data(), op2.data(), op1.size(), modulus, 1)); + EXPECT_ANY_THROW( + EltwiseMultMod(op1.data(), nullptr, op2.data(), op1.size(), modulus, 1)); + EXPECT_ANY_THROW( + EltwiseMultMod(op1.data(), op1.data(), nullptr, op1.size(), modulus, 1)); + EXPECT_ANY_THROW( + EltwiseMultMod(op1.data(), op1.data(), op2.data(), 0, modulus, 1)); + EXPECT_ANY_THROW( + EltwiseMultMod(op1.data(), op1.data(), op2.data(), op1.size(), 1, 1)); + EXPECT_ANY_THROW(EltwiseMultMod(op1.data(), op1.data(), op2.data(), + op1.size(), modulus, 0)); + EXPECT_ANY_THROW(EltwiseMultMod(op1.data(), big_input.data(), op2.data(), + op1.size(), modulus, 1)); + EXPECT_ANY_THROW(EltwiseMultMod(op1.data(), op1.data(), big_input.data(), + op1.size(), modulus, 1)); +} +#endif + +TEST(EltwiseMultModInPlace, 4) { + std::vector op1{2, 4, 3, 2}; + std::vector op2{2, 1, 2, 0}; + std::vector exp_out{4, 4, 6, 0}; + + uint64_t modulus = 769; + + EltwiseMultMod(op1.data(), op1.data(), op2.data(), op1.size(), modulus, 1); + CheckEqual(op1, exp_out); +} + +TEST(EltwiseMultModInPlace, 6) { + std::vector op1{0, 1, 2, 3, 4, 5}; + std::vector op2{2, 4, 6, 8, 10, 12}; + std::vector exp_out{0, 4, 12, 24, 40, 60}; + + uint64_t modulus = 769; + + EltwiseMultMod(op1.data(), op1.data(), op2.data(), op1.size(), modulus, 1); + CheckEqual(op1, exp_out); +} + +#ifdef HEXL_DEBUG +TEST(EltwiseMultModInPlace, 8_bounds) { + std::vector op1{0, 1, 2, 3, 4, 5, 6, 7}; + std::vector op2{0, 1, 2, 3, 4, 5, 6, 770}; + + uint64_t modulus = 769; + + EXPECT_ANY_THROW(EltwiseMultMod(op1.data(), op1.data(), op2.data(), + op1.size(), modulus, 1)); +} +#endif + +TEST(EltwiseMultModInPlace, 9) { + uint64_t modulus = GeneratePrimes(1, 51, 1024)[0]; + + std::vector op1{modulus - 3, 1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{modulus - 4, 8, 7, 6, 5, 4, 3, 2, 1}; + std::vector exp_out{12, 8, 14, 18, 20, 20, 18, 14, 8}; + + EltwiseMultMod(op1.data(), op1.data(), op2.data(), op1.size(), modulus, 1); + + CheckEqual(op1, exp_out); +} + +TEST(EltwiseMultMod, native_mult2) { + std::vector op1{1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector op2{17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0}; + std::vector exp_out{17, 36, 57, 80, 4, 31, 60, 91, + 23, 58, 95, 33, 74, 16, 61, 7}; + uint64_t modulus = 101; + + EltwiseMultModNative<1>(result.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, native2_big) { + uint64_t modulus = GeneratePrimes(1, 60, 1024)[0]; + + std::vector op1{modulus - 3, 1, 1, 1, 1, 1, 1, 1}; + std::vector op2{modulus - 4, 1, 1, 1, 1, 1, 1, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{12, 1, 1, 1, 1, 1, 1, 1}; + + EltwiseMultModNative<1>(result.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, 8big) { + uint64_t modulus = GeneratePrimes(1, 48, 1024)[0]; + + std::vector op1{modulus - 1, 1, 1, 1, 1, 1, 1, 1}; + std::vector op2{modulus - 1, 1, 1, 1, 1, 1, 1, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{1, 1, 1, 1, 1, 1, 1, 1}; + + EltwiseMultModNative<1>(result.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, 8big2) { + uint64_t p = 281474976749569; + + std::vector op1{(p - 1) / 2, 1, 1, 1, 1, 1, 1, 1}; + std::vector op2{(p + 1) / 2, 1, 1, 1, 1, 1, 1, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{70368744187392, 1, 1, 1, 1, 1, 1, 1}; + + EltwiseMultModNative<1>(result.data(), op1.data(), op2.data(), op1.size(), p); + + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, 8big3) { + uint64_t p = 1125891450734593; + + std::vector op1{1078888294739028, 1, 1, 1, 1, 1, 1, 1}; + std::vector op2{1114802337613200, 1, 1, 1, 1, 1, 1, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{13344071208410, 1, 1, 1, 1, 1, 1, 1}; + + EltwiseMultModNative<1>(result.data(), op1.data(), op2.data(), op1.size(), p); + + CheckEqual(result, exp_out); +} +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseMultMod, avx512_small) { + std::vector op1{1, 2, 3, 1, 1, 1, 0, 1, 0}; + std::vector op2{1, 1, 1, 1, 2, 3, 1, 0, 0}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{1, 2, 3, 1, 2, 3, 0, 0, 0}; + + uint64_t modulus = 769; + EltwiseMultModAVX512Float<1>(result.data(), op1.data(), op2.data(), + op1.size(), modulus); + + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, avx512_int2) { + uint64_t modulus = GeneratePrimes(1, 60, 1024)[0]; + + std::vector op1{modulus - 3, 1, 1, 1, 1, 1, 1, 1}; + std::vector op2{modulus - 4, 1, 1, 1, 1, 1, 1, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{12, 1, 1, 1, 1, 1, 1, 1}; + + EltwiseMultModAVX512Int<2>(result.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(result, exp_out); +} + +#endif + +TEST(EltwiseMultMod, 4) { + std::vector op1{2, 4, 3, 2}; + std::vector op2{2, 1, 2, 0}; + std::vector result{0, 0, 0, 0}; + std::vector exp_out{4, 4, 6, 0}; + + uint64_t modulus = 769; + + EltwiseMultMod(result.data(), op1.data(), op2.data(), op1.size(), modulus, 1); + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, 6) { + std::vector op1{0, 1, 2, 3, 4, 5}; + std::vector op2{2, 4, 6, 8, 10, 12}; + std::vector result{0, 0, 0, 0, 0, 0}; + std::vector exp_out{0, 4, 12, 24, 40, 60}; + + uint64_t modulus = 769; + + EltwiseMultMod(result.data(), op1.data(), op2.data(), op1.size(), modulus, 1); + CheckEqual(result, exp_out); +} + +#ifdef HEXL_DEBUG +TEST(EltwiseMultMod, 8_bounds) { + std::vector op1{0, 1, 2, 3, 4, 5, 6, 7}; + std::vector op2{0, 1, 2, 3, 4, 5, 6, 770}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + + uint64_t modulus = 769; + + EXPECT_ANY_THROW(EltwiseMultMod(result.data(), op1.data(), op2.data(), + op1.size(), modulus, 1)); +} +#endif + +TEST(EltwiseMultMod, 9) { + uint64_t modulus = GeneratePrimes(1, 51, 1024)[0]; + + std::vector op1{modulus - 3, 1, 2, 3, 4, 5, 6, 7, 8}; + std::vector op2{modulus - 4, 8, 7, 6, 5, 4, 3, 2, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{12, 8, 14, 18, 20, 20, 18, 14, 8}; + + EltwiseMultMod(result.data(), op1.data(), op2.data(), op1.size(), modulus, 1); + + CheckEqual(result, exp_out); +} + +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseMultMod, Big) { + uint64_t modulus = 1125891450734593; + + std::vector op1{706712574074152, 943467560561867, 1115920708919443, + 515713505356094, 525633777116309, 910766532971356, + 757086506562426, 799841520990167, 1}; + std::vector op2{515910833966633, 96924929169117, 537587376997453, + 41829060600750, 205864998008014, 463185427411646, + 965818279134294, 1075778049568657, 1}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{ + 231838787758587, 618753612121218, 1116345967490421, + 409735411065439, 25680427818594, 950138933882289, + 554128714280822, 1465109636753, 1}; + + EltwiseMultModAVX512Int<4>(result.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(result, exp_out); +} + +TEST(EltwiseMultMod, 8192) { + std::random_device rd; + std::mt19937 gen(rd()); + + size_t length = 8192; + + uint64_t input_mod_factor = 1; + uint64_t modulus = (1ULL << 53) + 7; + std::uniform_int_distribution distrib( + 0, input_mod_factor * modulus - 1); + + std::vector op1(length, 0); + std::vector op2(length, 0); + std::vector out_avx(length, 0); + std::vector out_native(length, 0); + + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + op2[i] = distrib(gen); + } + + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{ + 231838787758587, 618753612121218, 1116345967490421, + 409735411065439, 25680427818594, 950138933882289, + 554128714280822, 1465109636753, 1}; + + EltwiseMultModAVX512Int<1>(out_avx.data(), op1.data(), op2.data(), op1.size(), + modulus); + + EltwiseMultModNative<1>(out_native.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(out_avx, out_native); +} + +TEST(EltwiseMultMod, 16384) { + std::random_device rd; + std::mt19937 gen(rd()); + + size_t length = 16384; + + uint64_t input_mod_factor = 1; + uint64_t modulus = (1ULL << 53) + 7; + std::uniform_int_distribution distrib( + 0, input_mod_factor * modulus - 1); + + std::vector op1(length, 0); + std::vector op2(length, 0); + std::vector out_avx(length, 0); + std::vector out_native(length, 0); + + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + op2[i] = distrib(gen); + } + + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::vector exp_out{ + 231838787758587, 618753612121218, 1116345967490421, + 409735411065439, 25680427818594, 950138933882289, + 554128714280822, 1465109636753, 1}; + + EltwiseMultModAVX512Int<1>(out_avx.data(), op1.data(), op2.data(), op1.size(), + modulus); + + EltwiseMultModNative<1>(out_native.data(), op1.data(), op2.data(), op1.size(), + modulus); + + CheckEqual(out_avx, out_native); +} + +#endif + +// Checks AVX512 and native eltwise mult Out-of-Place implementations match +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseMultMod, AVX512Big) { + std::random_device rd; + std::mt19937 gen(rd()); + + size_t length = 173; + + for (size_t input_mod_factor = 1; input_mod_factor <= 4; + input_mod_factor *= 2) { + for (size_t bits = 1; bits <= 60; ++bits) { + uint64_t modulus = (1ULL << bits) + 7; + std::uniform_int_distribution distrib( + 0, input_mod_factor * modulus - 1); + + bool use_avx512_float = (input_mod_factor * modulus < MaximumValue(50)); + +#ifdef HEXL_DEBUG + size_t num_trials = 10; +#else + size_t num_trials = 100; +#endif + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector op1(length, 0); + std::vector op2(length, 0); + std::vector rs1(length, 0); + std::vector rs2(length, 0); + std::vector rs3(length, 0); + std::vector rs4(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + op2[i] = distrib(gen); + } + op1[0] = input_mod_factor * modulus - 1; + op2[0] = input_mod_factor * modulus - 1; + + switch (input_mod_factor) { + case 1: + EltwiseMultModNative<1>(rs1.data(), op1.data(), op2.data(), + op1.size(), modulus); + if (use_avx512_float) { + EltwiseMultModAVX512Float<1>(rs2.data(), op1.data(), op2.data(), + op1.size(), modulus); + } else { + EltwiseMultModAVX512Int<1>(rs3.data(), op1.data(), op2.data(), + op1.size(), modulus); + } + break; + case 2: + EltwiseMultModNative<2>(rs1.data(), op1.data(), op2.data(), + op1.size(), modulus); + if (use_avx512_float) { + EltwiseMultModAVX512Float<2>(rs2.data(), op1.data(), op2.data(), + op1.size(), modulus); + } else { + EltwiseMultModAVX512Int<2>(rs3.data(), op1.data(), op2.data(), + op1.size(), modulus); + } + break; + case 4: + EltwiseMultModNative<4>(rs1.data(), op1.data(), op2.data(), + op1.size(), modulus); + if (use_avx512_float) { + EltwiseMultModAVX512Float<4>(rs2.data(), op1.data(), op2.data(), + op1.size(), modulus); + } else { + EltwiseMultModAVX512Int<4>(rs3.data(), op1.data(), op2.data(), + op1.size(), modulus); + } + break; + } + EltwiseMultMod(rs4.data(), op1.data(), op2.data(), op1.size(), modulus, + input_mod_factor); + + ASSERT_EQ(rs4, rs1); + + ASSERT_EQ(rs1[0], 1); + if (use_avx512_float) { + ASSERT_EQ(rs1, rs2); + ASSERT_EQ(rs2[0], 1); + } else { + ASSERT_EQ(rs1, rs3); + ASSERT_EQ(rs3[0], 1); + } + } + } + } +} +#endif +} // namespace hexl +} // namespace intel diff --git a/test/test-eltwise-reduce-mod.cpp b/test/test-eltwise-reduce-mod.cpp new file mode 100644 index 00000000..6e210888 --- /dev/null +++ b/test/test-eltwise-reduce-mod.cpp @@ -0,0 +1,304 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "eltwise/eltwise-reduce-mod-avx512.hpp" +#include "eltwise/eltwise-reduce-mod-internal.hpp" +#include "intel-hexl/eltwise/eltwise-reduce-mod.hpp" +#include "logging/logging.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +TEST(EltwiseReduceMod, 2_2) { + std::vector op{0, 450, 735, 900, 1350, 1459}; + std::vector exp_out{0, 450, 735, 900, 1350, 1459}; + std::vector result{0, 0, 0, 0, 0, 0}; + + const uint64_t modulus = 750; + const uint64_t input_mod_factor = 2; + const uint64_t output_mod_factor = 2; + EltwiseReduceMod(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, 4_1) { + std::vector op{2, 4, 1600, 2500}; + std::vector exp_out{2, 4, 100, 250}; + std::vector result{0, 0, 0, 0}; + + const uint64_t modulus = 750; + const uint64_t input_mod_factor = 4; + const uint64_t output_mod_factor = 1; + EltwiseReduceMod(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, 0_1) { + std::vector op{2, 4, 1600, 2500}; + std::vector exp_out{2, 4, 100, 250}; + std::vector result{0, 0, 0, 0}; + + const uint64_t modulus = 750; + const uint64_t input_mod_factor = 0; + const uint64_t output_mod_factor = 1; + EltwiseReduceMod(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, 2_1) { + std::vector op{0, 450, 735, 900, 1350, 1459}; + std::vector exp_out{0, 450, 5, 170, 620, 729}; + std::vector result{0, 0, 0, 0, 0, 0}; + + const uint64_t modulus = 730; + const uint64_t input_mod_factor = 2; + const uint64_t output_mod_factor = 1; + EltwiseReduceMod(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, 4_2) { + std::vector op{1, 730, 1000, 1460, 2100, 2919}; + std::vector exp_out{1, 730, 1000, 0, 640, 1459}; + std::vector result{0, 0, 0, 0, 0, 0}; + + const uint64_t modulus = 730; + const uint64_t input_mod_factor = 4; + const uint64_t output_mod_factor = 2; + EltwiseReduceMod(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +#ifdef HEXL_HAS_AVX512DQ +TEST(EltwiseReduceMod, avx512_0_1) { + std::vector op{0, 111, 250, 340, 769, 900, 1200, 1530}; + std::vector exp_out{0, 111, 250, 340, 0, 131, 431, 761}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0}; + + uint64_t modulus = 769; + const uint64_t input_mod_factor = 0; + const uint64_t output_mod_factor = 1; + EltwiseReduceModAVX512(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, avx512_2_1) { + std::vector op{0, 54, 100, 135, 201, 18, 148, 168, 201}; + std::vector exp_out{0, 54, 100, 34, 100, 18, 47, 67, 100}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + + uint64_t modulus = 101; + const uint64_t input_mod_factor = 2; + const uint64_t output_mod_factor = 1; + EltwiseReduceModAVX512(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, avx512_4_1) { + std::vector op{0, 54, 100, 135, 201, 220, 350, 370, 403}; + std::vector exp_out{0, 54, 100, 34, 100, 18, 47, 67, 100}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + + uint64_t modulus = 101; + const uint64_t input_mod_factor = 4; + const uint64_t output_mod_factor = 1; + EltwiseReduceModAVX512(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +TEST(EltwiseReduceMod, avx512_4_2) { + std::vector op{0, 54, 100, 135, 201, 220, 350, 370, 403}; + std::vector exp_out{0, 54, 100, 135, 201, 18, 148, 168, 201}; + std::vector result{0, 0, 0, 0, 0, 0, 0, 0, 0}; + + uint64_t modulus = 101; + const uint64_t input_mod_factor = 4; + const uint64_t output_mod_factor = 2; + EltwiseReduceModAVX512(result.data(), op.data(), modulus, op.size(), + input_mod_factor, output_mod_factor); + CheckEqual(result, exp_out); +} + +// Checks AVX512 and native EltwiseReduceMod implementations match with randomly +// generated inputs +TEST(EltwiseReduceMod, AVX512Big_0_1) { + std::random_device rd; + std::mt19937 gen(rd()); + + for (size_t log2N = 13; log2N <= 15; ++log2N) { + size_t length = 1 << log2N; + + for (size_t bits = 50; bits <= 62; ++bits) { + uint64_t prime = GeneratePrimes(1, bits, 1024)[0]; + std::uniform_int_distribution distrib(0, prime - 1); + +#ifdef HEXL_DEBUG + size_t num_trials = 10; +#else + size_t num_trials = 100; +#endif + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector op1(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + } + std::vector result1(length, 0); + std::vector result2(length, 0); + std::vector result3(length, 0); + auto op2 = op1; + auto op3 = op1; + + EltwiseReduceModNative(result1.data(), op1.data(), prime, op1.size(), 0, + 1); + EltwiseReduceModNative(result2.data(), op2.data(), prime, op1.size(), 0, + 1); + EltwiseReduceModAVX512(result3.data(), op3.data(), prime, op2.size(), 0, + 1); + + ASSERT_EQ(result1, result2); + ASSERT_EQ(result1, result3); + } + } + } +} + +TEST(EltwiseReduceMod, AVX512Big_4_1) { + std::random_device rd; + std::mt19937 gen(rd()); + + for (size_t log2N = 13; log2N <= 15; ++log2N) { + size_t length = 1 << log2N; + + for (size_t bits = 50; bits <= 62; ++bits) { + uint64_t prime = GeneratePrimes(1, bits, 1024)[0]; + std::uniform_int_distribution distrib(0, (4 * prime) - 1); + +#ifdef HEXL_DEBUG + size_t num_trials = 10; +#else + size_t num_trials = 100; +#endif + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector op1(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + } + std::vector result1(length, 0); + std::vector result2(length, 0); + std::vector result3(length, 0); + auto op2 = op1; + auto op3 = op1; + + EltwiseReduceModNative(result1.data(), op1.data(), prime, op1.size(), 4, + 1); + EltwiseReduceModNative(result2.data(), op2.data(), prime, op1.size(), 4, + 1); + EltwiseReduceModAVX512(result3.data(), op3.data(), prime, op2.size(), 4, + 1); + + ASSERT_EQ(result1, result2); + ASSERT_EQ(result1, result3); + } + } + } +} + +TEST(EltwiseReduceMod, AVX512Big_4_2) { + std::random_device rd; + std::mt19937 gen(rd()); + + for (size_t log2N = 13; log2N <= 15; ++log2N) { + size_t length = 1 << log2N; + + for (size_t bits = 50; bits <= 62; ++bits) { + uint64_t prime = GeneratePrimes(1, bits, 1024)[0]; + std::uniform_int_distribution distrib(0, (4 * prime) - 1); + +#ifdef HEXL_DEBUG + size_t num_trials = 10; +#else + size_t num_trials = 100; +#endif + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector op1(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + } + std::vector result1(length, 0); + std::vector result2(length, 0); + std::vector result3(length, 0); + auto op2 = op1; + auto op3 = op1; + EltwiseReduceModNative(result1.data(), op1.data(), prime, op1.size(), 4, + 2); + EltwiseReduceModNative(result2.data(), op2.data(), prime, op1.size(), 4, + 2); + EltwiseReduceModAVX512(result3.data(), op3.data(), prime, op2.size(), 4, + 2); + + ASSERT_EQ(result1, result2); + ASSERT_EQ(result1, result3); + } + } + } +} + +TEST(EltwiseReduceMod, AVX512Big_2_1) { + std::random_device rd; + std::mt19937 gen(rd()); + + for (size_t log2N = 13; log2N <= 15; ++log2N) { + size_t length = 1 << log2N; + + for (size_t bits = 50; bits <= 62; ++bits) { + uint64_t prime = GeneratePrimes(1, bits, 1024)[0]; + std::uniform_int_distribution distrib(0, (2 * prime) - 1); + +#ifdef HEXL_DEBUG + size_t num_trials = 10; +#else + size_t num_trials = 100; +#endif + for (size_t trial = 0; trial < num_trials; ++trial) { + std::vector op1(length, 0); + for (size_t i = 0; i < length; ++i) { + op1[i] = distrib(gen); + } + std::vector result1(length, 0); + std::vector result2(length, 0); + std::vector result3(length, 0); + auto op2 = op1; + auto op3 = op1; + EltwiseReduceModNative(result1.data(), op1.data(), prime, op1.size(), 2, + 1); + EltwiseReduceModNative(result2.data(), op2.data(), prime, op1.size(), 2, + 1); + EltwiseReduceModAVX512(result3.data(), op3.data(), prime, op2.size(), 2, + 1); + + ASSERT_EQ(result1, result2); + ASSERT_EQ(result1, result3); + } + } + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/test/test-ntt.cpp b/test/test-ntt.cpp new file mode 100644 index 00000000..18442e51 --- /dev/null +++ b/test/test-ntt.cpp @@ -0,0 +1,598 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include +#include + +#include "intel-hexl/ntt/ntt.hpp" +#include "logging/logging.hpp" +#include "ntt/fwd-ntt-avx512.hpp" +#include "ntt/inv-ntt-avx512.hpp" +#include "ntt/ntt-internal.hpp" +#include "number-theory/number-theory.hpp" +#include "test-util.hpp" + +namespace intel { +namespace hexl { + +#ifdef HEXL_DEBUG +TEST(NTT, bad_input) { + uint64_t p = 769; + uint64_t N = 8; + std::vector input; + std::vector p_input; + std::vector p_times_2_input; + std::vector p_times_4_input; + + NTT ntt(N, p); + + auto init_inputs = [&]() { + input = {1, 2, 3, 4, 5, 6, 7, 8}; + p_input = std::vector(N, p); + p_times_2_input = std::vector(N, 2 * p); + p_times_4_input = std::vector(N, 4 * p); + }; + + // Forward transform + // Bad input + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeForward(input.data(), nullptr, 1, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeForward(nullptr, input.data(), 1, 1)); + init_inputs(); + EXPECT_NO_THROW(ntt.ComputeForward(input.data(), input.data(), 1, 1)); + init_inputs(); + EXPECT_NO_THROW(ntt.ComputeForward(p_input.data(), p_input.data(), 4, 4)); + init_inputs(); + EXPECT_ANY_THROW( + ntt.ComputeForward(p_times_2_input.data(), p_times_2_input.data(), 2, 1)); + init_inputs(); + EXPECT_NO_THROW( + ntt.ComputeForward(p_times_2_input.data(), p_times_2_input.data(), 4, 4)); + init_inputs(); + EXPECT_ANY_THROW( + ntt.ComputeForward(p_times_4_input.data(), p_times_4_input.data(), 4, 4)); + init_inputs(); + + // Bad mod factors + EXPECT_NO_THROW(ntt.ComputeForward(input.data(), input.data(), 2, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeForward(input.data(), input.data(), 123, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeForward(input.data(), input.data(), 2, 123)); + init_inputs(); + + // Inverse tranform + + // Bad input + EXPECT_ANY_THROW(ntt.ComputeInverse(input.data(), nullptr, 1, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeInverse(nullptr, input.data(), 1, 1)); + init_inputs(); + + EXPECT_NO_THROW(ntt.ComputeInverse(input.data(), input.data(), 1, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeInverse(p_input.data(), p_input.data(), 1, 1)); + init_inputs(); + EXPECT_NO_THROW(ntt.ComputeInverse(p_input.data(), p_input.data(), 2, 2)); + init_inputs(); + EXPECT_ANY_THROW( + ntt.ComputeInverse(p_times_2_input.data(), p_times_2_input.data(), 2, 2)); + init_inputs(); + + // Bad mod factors + EXPECT_NO_THROW(ntt.ComputeInverse(input.data(), input.data(), 1, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeInverse(input.data(), input.data(), 123, 1)); + init_inputs(); + EXPECT_ANY_THROW(ntt.ComputeInverse(input.data(), input.data(), 1, 123)); + init_inputs(); +} +#endif + +TEST(NTT, Powers) { + uint64_t modulus = 0xffffffffffc0001ULL; + { + uint64_t N = 2; + NTT::NTTImpl ntt_impl(N, modulus); + + ASSERT_EQ(1ULL, ntt_impl.GetRootOfUnityPower(0)); + ASSERT_EQ(288794978602139552ULL, ntt_impl.GetRootOfUnityPower(1)); + } + + { + uint64_t N = 4; + NTT::NTTImpl ntt_impl(N, modulus); + + ASSERT_EQ(1ULL, ntt_impl.GetRootOfUnityPower(0)); + ASSERT_EQ(288794978602139552ULL, ntt_impl.GetRootOfUnityPower(1)); + ASSERT_EQ(178930308976060547ULL, ntt_impl.GetRootOfUnityPower(2)); + ASSERT_EQ(748001537669050592ULL, ntt_impl.GetRootOfUnityPower(3)); + } +} + +TEST(NTT, root_of_unity) { + uint64_t p = 769; + uint64_t N = 8; + std::vector input{1, 2, 3, 4, 5, 6, 7, 8}; + std::vector input2 = input; + + uint64_t root_of_unity = MinimalPrimitiveRoot(2 * N, p); + + NTT ntt1(N, p); + NTT ntt2(N, p, root_of_unity); + + ntt1.ComputeForward(input.data(), input.data(), 1, 1); + ntt2.ComputeForward(input2.data(), input2.data(), 1, 1); + + AssertEqual(input, input2); +} + +TEST(NTTImpl, root_of_unity) { + uint64_t p = 769; + uint64_t N = 8; + + NTT::NTTImpl ntt_impl(N, p); + + EXPECT_EQ(ntt_impl.GetMinimalRootOfUnity(), MinimalPrimitiveRoot(2 * N, p)); + EXPECT_EQ(ntt_impl.GetDegree(), N); + EXPECT_EQ(ntt_impl.GetInvRootOfUnityPower(0), + ntt_impl.GetInvRootOfUnityPowers()[0]); +} + +// Parameters = (degree, prime, input, expected_output) +class NTTAPITest + : public ::testing::TestWithParam, std::vector>> { + protected: + void SetUp() {} + + void TearDown() {} + + public: +}; + +// Test different parts of the API +TEST_P(NTTAPITest, Fwd) { + uint64_t N = std::get<0>(GetParam()); + uint64_t prime = std::get<1>(GetParam()); + + const std::vector input_copy = std::get<2>(GetParam()); + std::vector exp_output = std::get<3>(GetParam()); + std::vector input = input_copy; + std::vector out_buffer(input.size(), 99); + + // In-place Fwd NTT + NTT::NTTImpl ntt_impl(N, prime); + NTT ntt(N, prime); + ntt.ComputeForward(input.data(), input.data(), 1, 1); + AssertEqual(input, exp_output); + + // In-place lazy NTT + input = input_copy; + ntt.ComputeForward(input.data(), input.data(), 2, 4); + for (auto& elem : input) { + elem = elem % prime; + } + AssertEqual(input, exp_output); + + // Compute reference + input = input_copy; + ReferenceForwardTransformToBitReverse(input.data(), N, prime, + ntt_impl.GetRootOfUnityPowers().data()); + AssertEqual(input, exp_output); + + // Test round-trip + input = input_copy; + ntt.ComputeForward(out_buffer.data(), input.data(), 1, 1); + ntt.ComputeInverse(input.data(), out_buffer.data(), 1, 1); + AssertEqual(input, input_copy); + + // Test out-of-place forward + input = input_copy; + ntt.ComputeForward(out_buffer.data(), input.data(), 2, 1); + AssertEqual(out_buffer, exp_output); + + // Test out-of-place inverse + input = input_copy; + ntt.ComputeForward(out_buffer.data(), input.data(), 2, 1); + ntt.ComputeInverse(input.data(), out_buffer.data(), 1, 1); + AssertEqual(input, input_copy); + + // Test out-of-place inverse lazy + input = input_copy; + ntt.ComputeForward(out_buffer.data(), input.data(), 2, 1); + ntt.ComputeInverse(input.data(), out_buffer.data(), 1, 2); + for (auto& elem : input) { + elem = elem % prime; + } + AssertEqual(input, input_copy); +} + +INSTANTIATE_TEST_SUITE_P( + NTTAPITest, NTTAPITest, + ::testing::Values( + std::make_tuple(2, 281474976710897, std::vector{0, 0}, + std::vector{0, 0}), + std::make_tuple(2, 0xffffffffffc0001ULL, std::vector{0, 0}, + std::vector{0, 0}), + std::make_tuple(2, 281474976710897, std::vector{1, 0}, + std::vector{1, 1}), + std::make_tuple(2, 281474976710897, std::vector{1, 1}, + std::vector{19842761023586, 261632215687313}), + std::make_tuple(2, 0xffffffffffc0001ULL, std::vector{1, 1}, + std::vector{288794978602139553, + 864126526004445282}), + std::make_tuple(4, 113, std::vector{94, 109, 11, 18}, + std::vector{82, 2, 81, 98}), + std::make_tuple(4, 281474976710897, + std::vector{281474976710765, 49, + 281474976710643, 275}, + std::vector{12006376116355, 216492038983166, + 272441922811203, 62009615510542}), + std::make_tuple(4, 113, std::vector{59, 50, 98, 50}, + std::vector{1, 2, 3, 4}), + std::make_tuple(4, 73, std::vector{2, 1, 1, 1}, + std::vector{17, 41, 36, 60}), + std::make_tuple(4, 16417, std::vector{31, 21, 15, 34}, + std::vector{1611, 14407, 14082, 2858}), + std::make_tuple(4, 4194353, + std::vector{4127, 9647, 1987, 5410}, + std::vector{1478161, 3359347, 222964, + 3344742}), + std::make_tuple(8, 4194353, + std::vector{1, 0, 0, 0, 0, 0, 0, 0}, + std::vector{1, 1, 1, 1, 1, 1, 1, 1}), + std::make_tuple(8, 4194353, + std::vector{1, 1, 0, 0, 0, 0, 0, 0}, + std::vector{132171, 4062184, 2675172, 1519183, + 462763, 3731592, 1824324, + 2370031}), + std::make_tuple( + 32, 769, + std::vector{401, 203, 221, 352, 487, 151, 405, 356, + 343, 424, 635, 757, 457, 280, 624, 353, + 496, 353, 624, 280, 457, 757, 635, 424, + 343, 356, 405, 151, 487, 352, 221, 203}, + std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}))); + +class FwdNTTZerosTest + : public ::testing::TestWithParam> { + protected: + void SetUp() {} + void TearDown() {} + + public: +}; + +// Parameters = (degree, prime_bits) +TEST_P(FwdNTTZerosTest, Zeros) { + uint64_t N = std::get<0>(GetParam()); + uint64_t prime_bits = std::get<1>(GetParam()); + uint64_t prime = GeneratePrimes(1, prime_bits, N)[0]; + + std::vector input(N, 0); + std::vector exp_output(N, 0); + + NTT ntt(N, prime); + ntt.ComputeForward(input.data(), input.data(), 1, 1); + + AssertEqual(input, exp_output); +} + +INSTANTIATE_TEST_SUITE_P( + FwdNTTZerosTest, FwdNTTZerosTest, + ::testing::Values( + std::make_tuple(1 << 1, 30), std::make_tuple(1 << 2, 30), + std::make_tuple(1 << 3, 30), std::make_tuple(1 << 4, 35), + std::make_tuple(1 << 5, 35), std::make_tuple(1 << 6, 35), + std::make_tuple(1 << 7, 40), std::make_tuple(1 << 8, 40), + std::make_tuple(1 << 9, 40), std::make_tuple(1 << 10, 45), + std::make_tuple(1 << 11, 45), std::make_tuple(1 << 12, 45), + std::make_tuple(1 << 13, 50), std::make_tuple(1 << 14, 50), + std::make_tuple(1 << 15, 50), std::make_tuple(1 << 16, 55), + std::make_tuple(1 << 17, 55))); + +class InvNTTZerosTest + : public ::testing::TestWithParam> { + protected: + void SetUp() {} + void TearDown() {} + + public: +}; + +// Parameters = (degree, prime_bits) +TEST_P(InvNTTZerosTest, Zeros) { + uint64_t N = std::get<0>(GetParam()); + uint64_t prime_bits = std::get<1>(GetParam()); + uint64_t prime = GeneratePrimes(1, prime_bits, N)[0]; + + std::vector input(N, 0); + std::vector exp_output(N, 0); + + NTT ntt(N, prime); + ntt.ComputeInverse(input.data(), input.data(), 1, 1); + + AssertEqual(input, exp_output); +} + +INSTANTIATE_TEST_SUITE_P( + InvNTTZerosTest, InvNTTZerosTest, + ::testing::Values( + std::make_tuple(1 << 1, 30), std::make_tuple(1 << 2, 30), + std::make_tuple(1 << 3, 30), std::make_tuple(1 << 4, 35), + std::make_tuple(1 << 5, 35), std::make_tuple(1 << 6, 35), + std::make_tuple(1 << 7, 40), std::make_tuple(1 << 8, 40), + std::make_tuple(1 << 9, 40), std::make_tuple(1 << 10, 45), + std::make_tuple(1 << 11, 45), std::make_tuple(1 << 12, 45), + std::make_tuple(1 << 13, 50), std::make_tuple(1 << 14, 50), + std::make_tuple(1 << 15, 50), std::make_tuple(1 << 16, 55), + std::make_tuple(1 << 17, 55))); + +#ifdef HEXL_HAS_AVX512IFMA +class NTTPrimesTest + : public ::testing::TestWithParam> { + protected: + void SetUp() {} + + void TearDown() {} + + public: +}; + +// Test primes around 50 bits to check IFMA behavior +// Parameters = (degree, prime_bits) +TEST_P(NTTPrimesTest, IFMAPrimes) { + uint64_t N = std::get<0>(GetParam()); + uint64_t prime_bits = std::get<1>(GetParam()); + uint64_t prime = GeneratePrimes(1, prime_bits, N)[0]; + + std::vector input64(N, 0); + for (size_t i = 0; i < N; ++i) { + input64[i] = i % prime; + } + std::vector input_ifma = input64; + std::vector input_ifma_lazy = input64; + + std::vector exp_output(N, 0); + + // Compute reference + NTT::NTTImpl ntt64(N, prime); + ReferenceForwardTransformToBitReverse(input64.data(), N, prime, + ntt64.GetRootOfUnityPowers().data()); + + // Compute with 52-bit bit shift + NTT::NTTImpl ntt_ifma(N, prime); + ForwardTransformToBitReverseAVX512<52>( + input_ifma.data(), N, ntt_ifma.GetModulus(), + ntt_ifma.GetRootOfUnityPowers().data(), + ntt_ifma.GetPrecon52RootOfUnityPowers().data(), 2, 1); + + // Compute lazy + ForwardTransformToBitReverseAVX512<52>( + input_ifma_lazy.data(), N, ntt_ifma.GetModulus(), + ntt_ifma.GetRootOfUnityPowers().data(), + ntt_ifma.GetPrecon52RootOfUnityPowers().data(), 2, 4); + for (auto& elem : input_ifma_lazy) { + elem = elem % prime; + } + + AssertEqual(input64, input_ifma); + AssertEqual(input64, input_ifma_lazy); +} + +INSTANTIATE_TEST_SUITE_P(NTTPrimesTest, NTTPrimesTest, + ::testing::Values(std::make_tuple(1 << 4, 48), + std::make_tuple(1 << 5, 49), + std::make_tuple(1 << 6, 49), + std::make_tuple(1 << 7, 49), + std::make_tuple(1 << 8, 49))); +#endif + +#ifdef HEXL_HAS_AVX512DQ +TEST(NTT, LoadFwdInterleavedT1) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i out1; + __m512i out2; + + LoadFwdInterleavedT1(arg.data(), &out1, &out2); + + __m512i exp1 = _mm512_set_epi64(14, 6, 12, 4, 10, 2, 8, 0); + __m512i exp2 = _mm512_set_epi64(15, 7, 13, 5, 11, 3, 9, 1); + AssertEqual(ExtractValues(out1), ExtractValues(exp1)); + AssertEqual(ExtractValues(out2), ExtractValues(exp2)); +} + +TEST(NTT, LoadInvInterleavedT1) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i out1; + __m512i out2; + + LoadInvInterleavedT1(arg.data(), &out1, &out2); + + __m512i exp1 = _mm512_set_epi64(14, 12, 10, 8, 6, 4, 2, 0); + __m512i exp2 = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + AssertEqual(ExtractValues(out1), ExtractValues(exp1)); + AssertEqual(ExtractValues(out2), ExtractValues(exp2)); +} + +TEST(NTT, LoadFwdInterleavedT2) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i out1; + __m512i out2; + + LoadFwdInterleavedT2(arg.data(), &out1, &out2); + + __m512i exp1 = _mm512_set_epi64(13, 12, 5, 4, 9, 8, 1, 0); + __m512i exp2 = _mm512_set_epi64(15, 14, 7, 6, 11, 10, 3, 2); + AssertEqual(ExtractValues(out1), ExtractValues(exp1)); + AssertEqual(ExtractValues(out2), ExtractValues(exp2)); +} + +TEST(NTT, LoadInvInterleavedT2) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i out1; + __m512i out2; + + LoadInvInterleavedT2(arg.data(), &out1, &out2); + + __m512i exp1 = _mm512_set_epi64(14, 6, 12, 4, 10, 2, 8, 0); + __m512i exp2 = _mm512_set_epi64(15, 7, 13, 5, 11, 3, 9, 1); + AssertEqual(ExtractValues(out1), ExtractValues(exp1)); + AssertEqual(ExtractValues(out2), ExtractValues(exp2)); +} + +TEST(NTT, LoadFwdInterleavedT4) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i out1; + __m512i out2; + + LoadFwdInterleavedT4(arg.data(), &out1, &out2); + + __m512i exp1 = _mm512_set_epi64(11, 10, 9, 8, 3, 2, 1, 0); + __m512i exp2 = _mm512_set_epi64(15, 14, 13, 12, 7, 6, 5, 4); + AssertEqual(ExtractValues(out1), ExtractValues(exp1)); + AssertEqual(ExtractValues(out2), ExtractValues(exp2)); +} + +TEST(NTT, LoadInvInterleavedT4) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i out1; + __m512i out2; + + LoadInvInterleavedT4(arg.data(), &out1, &out2); + + __m512i exp1 = _mm512_set_epi64(13, 12, 5, 4, 9, 8, 1, 0); + __m512i exp2 = _mm512_set_epi64(15, 14, 7, 6, 11, 10, 3, 2); + AssertEqual(ExtractValues(out1), ExtractValues(exp1)); + AssertEqual(ExtractValues(out2), ExtractValues(exp2)); +} + +TEST(NTT, WriteFwdInterleavedT1) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i arg1 = _mm512_set_epi64(15, 14, 13, 12, 11, 10, 9, 8); + __m512i arg2 = _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0); + + std::vector out(16, 0); + std::vector exp{8, 0, 9, 1, 10, 2, 11, 3, + 12, 4, 13, 5, 14, 6, 15, 7}; + + WriteFwdInterleavedT1(arg1, arg2, reinterpret_cast<__m512i*>(&out[0])); + + AssertEqual(exp, out); +} + +TEST(NTT, WriteInvInterleavedT4) { + std::vector arg{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}; + __m512i arg1 = _mm512_set_epi64(15, 14, 13, 12, 11, 10, 9, 8); + __m512i arg2 = _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0); + + std::vector out(16, 0); + std::vector exp{8, 9, 10, 11, 0, 1, 2, 3, + 12, 13, 14, 15, 4, 5, 6, 7}; + + WriteInvInterleavedT4(arg1, arg2, reinterpret_cast<__m512i*>(&out[0])); + + AssertEqual(exp, out); +} + +// Checks AVX512 and native forward NTT implementations match +TEST(NTT, FwdNTT_AVX512) { + uint64_t N = 512; + uint64_t prime = GeneratePrimes(1, 55, N)[0]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distrib(0, prime - 1); + + for (size_t trial = 0; trial < 200; ++trial) { + std::vector input(N, 0); + for (size_t i = 0; i < N; ++i) { + input[i] = distrib(gen); + } + std::vector input_avx = input; + std::vector input_avx_lazy = input; + + NTT::NTTImpl ntt_impl(N, prime); + ForwardTransformToBitReverse64( + input.data(), N, prime, ntt_impl.GetRootOfUnityPowers().data(), + ntt_impl.GetPrecon64RootOfUnityPowers().data(), 2, 1); + + ForwardTransformToBitReverseAVX512<64>( + input_avx.data(), N, ntt_impl.GetModulus(), + ntt_impl.GetRootOfUnityPowers().data(), + ntt_impl.GetPrecon64RootOfUnityPowers().data(), 2, 1); + + // Compute lazy + ForwardTransformToBitReverseAVX512<64>( + input_avx_lazy.data(), N, ntt_impl.GetModulus(), + ntt_impl.GetRootOfUnityPowers().data(), + ntt_impl.GetPrecon64RootOfUnityPowers().data(), 2, 4); + for (auto& elem : input_avx_lazy) { + elem = elem % prime; + } + + ASSERT_EQ(input, input_avx); + ASSERT_EQ(input, input_avx_lazy); + } +} + +// Checks AVX512 and native InvNTT implementations match +TEST(NTT, InvNTT_AVX512) { + uint64_t N = 512; + uint64_t prime = GeneratePrimes(1, 55, N)[0]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distrib(0, prime - 1); + + for (size_t trial = 0; trial < 200; ++trial) { + std::vector input(N, 0); + for (size_t i = 0; i < N; ++i) { + input[i] = distrib(gen); + } + std::vector input_avx = input; + std::vector input_avx_lazy = input; + + NTT::NTTImpl ntt_impl(N, prime); + InverseTransformFromBitReverse64( + input.data(), N, prime, ntt_impl.GetInvRootOfUnityPowers().data(), + ntt_impl.GetPrecon64InvRootOfUnityPowers().data(), 1, 1); + + InverseTransformFromBitReverseAVX512<64>( + input_avx.data(), N, ntt_impl.GetModulus(), + ntt_impl.GetInvRootOfUnityPowers().data(), + ntt_impl.GetPrecon64InvRootOfUnityPowers().data(), 1, 1); + + // Compute lazy + InverseTransformFromBitReverseAVX512<64>( + input_avx_lazy.data(), N, ntt_impl.GetModulus(), + ntt_impl.GetInvRootOfUnityPowers().data(), + ntt_impl.GetPrecon64InvRootOfUnityPowers().data(), 1, 2); + for (auto& elem : input_avx_lazy) { + elem = elem % prime; + } + + ASSERT_EQ(input, input_avx); + ASSERT_EQ(input, input_avx_lazy); + } +} +#endif + +} // namespace hexl +} // namespace intel diff --git a/test/test-number-theory.cpp b/test/test-number-theory.cpp new file mode 100644 index 00000000..baceabf3 --- /dev/null +++ b/test/test-number-theory.cpp @@ -0,0 +1,427 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "gtest/gtest.h" +#include "number-theory/number-theory.hpp" + +namespace intel { +namespace hexl { + +TEST(NumberTheory, Log2) { + ASSERT_EQ(0, Log2(1)); + ASSERT_EQ(1, Log2(2)); + ASSERT_EQ(2, Log2(4)); + ASSERT_EQ(3, Log2(8)); + ASSERT_EQ(4, Log2(16)); + ASSERT_EQ(5, Log2(32)); + ASSERT_EQ(6, Log2(64)); + ASSERT_EQ(7, Log2(128)); + ASSERT_EQ(8, Log2(256)); + ASSERT_EQ(9, Log2(512)); + ASSERT_EQ(10, Log2(1024)); + ASSERT_EQ(11, Log2(2048)); + ASSERT_EQ(12, Log2(4096)); + ASSERT_EQ(13, Log2(8192)); +} + +TEST(NumberTheory, MultiplyUIntMod) { + uint64_t mod(2); + ASSERT_EQ(0ULL, MultiplyUIntMod(0, 0, mod)); + ASSERT_EQ(0ULL, MultiplyUIntMod(0, 1, mod)); + ASSERT_EQ(0ULL, MultiplyUIntMod(1, 0, mod)); + ASSERT_EQ(1ULL, MultiplyUIntMod(1, 1, mod)); + + mod = 10; + ASSERT_EQ(0ULL, MultiplyUIntMod(0, 0, mod)); + ASSERT_EQ(0ULL, MultiplyUIntMod(0, 1, mod)); + ASSERT_EQ(0ULL, MultiplyUIntMod(1, 0, mod)); + ASSERT_EQ(1ULL, MultiplyUIntMod(1, 1, mod)); + ASSERT_EQ(9ULL, MultiplyUIntMod(7, 7, mod)); + ASSERT_EQ(2ULL, MultiplyUIntMod(6, 7, mod)); + ASSERT_EQ(2ULL, MultiplyUIntMod(7, 6, mod)); + + mod = 2305843009211596801ULL; + ASSERT_EQ(0ULL, MultiplyUIntMod(0, 0, mod)); + ASSERT_EQ(0ULL, MultiplyUIntMod(0, 1, mod)); + ASSERT_EQ(0ULL, MultiplyUIntMod(1, 0, mod)); + ASSERT_EQ(1ULL, MultiplyUIntMod(1, 1, mod)); + ASSERT_EQ( + 576460752302899200ULL, + MultiplyUIntMod(1152921504605798400ULL, 1152921504605798401ULL, mod)); + ASSERT_EQ( + 576460752302899200ULL, + MultiplyUIntMod(1152921504605798401ULL, 1152921504605798400ULL, mod)); + ASSERT_EQ( + 1729382256908697601ULL, + MultiplyUIntMod(1152921504605798401ULL, 1152921504605798401ULL, mod)); + ASSERT_EQ(1ULL, MultiplyUIntMod(2305843009211596800ULL, + 2305843009211596800ULL, mod)); +} + +TEST(NumberTheory, MultiplyModPreCon) { + uint64_t mod(2); + MultiplyFactor mf0(0, 64, mod); + MultiplyFactor mf1(1, 64, mod); + ASSERT_EQ(0ULL, MultiplyMod(0, 0, mf0.BarrettFactor(), mod)); + ASSERT_EQ(0ULL, MultiplyMod(0, 1, mf1.BarrettFactor(), mod)); + ASSERT_EQ(0ULL, MultiplyMod(1, 0, mf0.BarrettFactor(), mod)); + ASSERT_EQ(1ULL, MultiplyMod(1, 1, mf1.BarrettFactor(), mod)); + + mod = 10; + MultiplyFactor mf6(6, 64, mod); + MultiplyFactor mf7(7, 64, mod); + ASSERT_EQ(0ULL, MultiplyMod(0, 0, mf0.BarrettFactor(), mod)); + ASSERT_EQ(0ULL, MultiplyMod(0, 1, mf1.BarrettFactor(), mod)); + ASSERT_EQ(0ULL, MultiplyMod(1, 0, mf0.BarrettFactor(), mod)); + ASSERT_EQ(1ULL, MultiplyMod(1, 1, mf1.BarrettFactor(), mod)); + ASSERT_EQ(9ULL, MultiplyMod(7, 7, mf7.BarrettFactor(), mod)); + ASSERT_EQ(2ULL, MultiplyMod(6, 7, mf7.BarrettFactor(), mod)); + ASSERT_EQ(2ULL, MultiplyMod(7, 6, mf6.BarrettFactor(), mod)); + + mod = 2305843009211596801ULL; + ASSERT_EQ(0ULL, MultiplyMod(0, 0, mf0.BarrettFactor(), mod)); + ASSERT_EQ(0ULL, MultiplyMod(0, 1, mf1.BarrettFactor(), mod)); + ASSERT_EQ(0ULL, MultiplyMod(1, 0, mf0.BarrettFactor(), mod)); + ASSERT_EQ(1ULL, MultiplyMod(1, 1, mf1.BarrettFactor(), mod)); + + MultiplyFactor mf1152921504605798401(1152921504605798401ULL, 64, mod); + MultiplyFactor mf1152921504605798400(1152921504605798400ULL, 64, mod); + MultiplyFactor mf2305843009211596800(2305843009211596800ULL, 64, mod); + ASSERT_EQ(576460752302899200ULL, + MultiplyMod(1152921504605798400ULL, 1152921504605798401ULL, + mf1152921504605798401.BarrettFactor(), mod)); + ASSERT_EQ(576460752302899200ULL, + MultiplyMod(1152921504605798401ULL, 1152921504605798400ULL, + mf1152921504605798400.BarrettFactor(), mod)); + ASSERT_EQ(1729382256908697601ULL, + MultiplyMod(1152921504605798401ULL, 1152921504605798401ULL, + mf1152921504605798401.BarrettFactor(), mod)); + ASSERT_EQ(1ULL, MultiplyMod(2305843009211596800ULL, 2305843009211596800ULL, + mf2305843009211596800.BarrettFactor(), mod)); +} + +TEST(NumberTheory, PowMod) { + uint64_t mod = 5; + ASSERT_EQ(1ULL, PowMod(1, 0, mod)); + ASSERT_EQ(1ULL, PowMod(1, 0xFFFFFFFFFFFFFFFFULL, mod)); + ASSERT_EQ(3ULL, PowMod(2, 0xFFFFFFFFFFFFFFFFULL, mod)); + + mod = 0x1000000000000000ULL; + ASSERT_EQ(0ULL, PowMod(2, 60, mod)); + ASSERT_EQ(0x800000000000000ULL, PowMod(2, 59, mod)); + + mod = 131313131313; + ASSERT_EQ(39418477653ULL, PowMod(2424242424, 16, mod)); +} + +TEST(NumberTheory, IsPowerOfTwo) { + std::vector powers_of_two{1, 2, 4, 8, 16, 32, + 512, 1024, 2048, 4096, 16384, 32768}; + std::vector not_powers_of_two{0, 3, 5, 7, 9, 31, 33, 1025, 4095}; + + for (auto power_of_two : powers_of_two) { + EXPECT_TRUE(IsPowerOfTwo(power_of_two)); + } + + for (auto not_power_of_two : not_powers_of_two) { + EXPECT_FALSE(IsPowerOfTwo(not_power_of_two)); + } +} + +TEST(NumberTheory, IsPrimitiveRoot) { + uint64_t mod = 11; + ASSERT_TRUE(IsPrimitiveRoot(10, 2, mod)); + ASSERT_FALSE(IsPrimitiveRoot(9, 2, mod)); + ASSERT_FALSE(IsPrimitiveRoot(10, 4, mod)); + + mod = 29; + ASSERT_TRUE(IsPrimitiveRoot(28, 2, mod)); + ASSERT_TRUE(IsPrimitiveRoot(12, 4, mod)); + ASSERT_FALSE(IsPrimitiveRoot(12, 2, mod)); + ASSERT_FALSE(IsPrimitiveRoot(12, 8, mod)); + + mod = 1234565441ULL; + ASSERT_TRUE(IsPrimitiveRoot(1234565440ULL, 2, mod)); + ASSERT_TRUE(IsPrimitiveRoot(960907033ULL, 8, mod)); + ASSERT_TRUE(IsPrimitiveRoot(1180581915ULL, 16, mod)); + ASSERT_FALSE(IsPrimitiveRoot(1180581915ULL, 32, mod)); + ASSERT_FALSE(IsPrimitiveRoot(1180581915ULL, 8, mod)); + ASSERT_FALSE(IsPrimitiveRoot(1180581915ULL, 2, mod)); +} + +TEST(NumberTheory, MinimalPrimitiveRoot) { + uint64_t mod = 11; + + ASSERT_EQ(10ULL, MinimalPrimitiveRoot(2, mod)); + + mod = 29; + ASSERT_EQ(28ULL, MinimalPrimitiveRoot(2, mod)); + ASSERT_EQ(12ULL, MinimalPrimitiveRoot(4, mod)); + + mod = 1234565441; + ASSERT_EQ(1234565440ULL, MinimalPrimitiveRoot(2, mod)); + ASSERT_EQ(249725733ULL, MinimalPrimitiveRoot(8, mod)); +} + +TEST(NumberTheory, InverseUIntMod) { + uint64_t input; + uint64_t modulus; + + input = 1, modulus = 2; + ASSERT_EQ(1ULL, InverseUIntMod(input, modulus)); + +#ifdef HEXL_DEBUG + input = 2, modulus = 2; + EXPECT_ANY_THROW(InverseUIntMod(input, modulus)); + + input = 0xFFFFFE, modulus = 2; + EXPECT_ANY_THROW(InverseUIntMod(input, modulus)); + + input = 12345, modulus = 3; + EXPECT_ANY_THROW(InverseUIntMod(input, modulus)); +#endif + + input = 3, modulus = 2; + ASSERT_EQ(1ULL, InverseUIntMod(input, modulus)); + + input = 0xFFFFFF, modulus = 2; + ASSERT_EQ(1ULL, InverseUIntMod(input, modulus)); + + input = 5, modulus = 19; + ASSERT_EQ(4ULL, InverseUIntMod(input, modulus)); + + input = 4, modulus = 19; + ASSERT_EQ(5ULL, InverseUIntMod(input, modulus)); +} + +TEST(NumberTheory, ReverseBitsUInt64) { + ASSERT_EQ(0ULL, ReverseBitsUInt(0ULL, 0)); + ASSERT_EQ(0ULL, ReverseBitsUInt(0ULL, 1)); + ASSERT_EQ(0ULL, ReverseBitsUInt(0ULL, 32)); + ASSERT_EQ(0ULL, ReverseBitsUInt(0ULL, 64)); + + ASSERT_EQ(0ULL, ReverseBitsUInt(1ULL, 0)); + ASSERT_EQ(1ULL, ReverseBitsUInt(1ULL, 1)); + ASSERT_EQ(1ULL << 31, ReverseBitsUInt(1ULL, 32)); + ASSERT_EQ(1ULL << 63, ReverseBitsUInt(1ULL, 64)); + + ASSERT_EQ(0ULL, ReverseBitsUInt(1ULL << 31, 0)); + ASSERT_EQ(0ULL, ReverseBitsUInt(1ULL << 31, 1)); + ASSERT_EQ(1ULL, ReverseBitsUInt(1ULL << 31, 32)); + ASSERT_EQ(1ULL << 32, ReverseBitsUInt(1ULL << 31, 64)); + + ASSERT_EQ(0ULL, ReverseBitsUInt(0xFFFFULL << 16, 0)); + ASSERT_EQ(0ULL, ReverseBitsUInt(0xFFFFULL << 16, 1)); + ASSERT_EQ(0xFFFFULL, ReverseBitsUInt(0xFFFFULL << 16, 32)); + ASSERT_EQ(0xFFFFULL << 32, ReverseBitsUInt(0xFFFFULL << 16, 64)); + + ASSERT_EQ(0ULL, ReverseBitsUInt(0x0000FFFFFFFF0000ULL, 0)); + ASSERT_EQ(0ULL, ReverseBitsUInt(0x0000FFFFFFFF0000ULL, 1)); + ASSERT_EQ(0xFFFFULL, ReverseBitsUInt(0x0000FFFFFFFF0000ULL, 32)); + ASSERT_EQ(0x0000FFFFFFFF0000ULL, ReverseBitsUInt(0x0000FFFFFFFF0000ULL, 64)); + + ASSERT_EQ(0ULL, ReverseBitsUInt(0xFFFF0000FFFF0000ULL, 0)); + ASSERT_EQ(0ULL, ReverseBitsUInt(0xFFFF0000FFFF0000ULL, 1)); + ASSERT_EQ(0xFFFFULL, ReverseBitsUInt(0xFFFF0000FFFF0000ULL, 32)); + ASSERT_EQ(0x0000FFFF0000FFFFULL, ReverseBitsUInt(0xFFFF0000FFFF0000ULL, 64)); +} + +TEST(NumberTheory, MultiplyUIntModLazy64) { + uint64_t mod = 2; + uint64_t y = 0; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(0, y, mod)); + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(1, y, mod)); + y = 1; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(0, y, mod)); + ASSERT_EQ(1ULL, MultiplyUIntModLazy<64>(1, y, mod)); + + mod = 10; + y = 0; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(0, y, mod)); + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(1, y, mod)); + y = 1; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(0, y, mod)); + ASSERT_EQ(1ULL, MultiplyUIntModLazy<64>(1, y, mod)); + y = 6; + ASSERT_EQ(2ULL, MultiplyUIntModLazy<64>(7, y, mod)); + y = 7; + ASSERT_EQ(9ULL, MultiplyUIntModLazy<64>(7, y, mod)); + ASSERT_EQ(2ULL, MultiplyUIntModLazy<64>(6, y, mod)); + + mod = 2305843009211596801ULL; + y = 0; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(0, y, mod)); + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(1, y, mod)); + y = 1; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<64>(0, y, mod)); + ASSERT_EQ(1ULL, MultiplyUIntModLazy<64>(1, y, mod)); + y = 1152921504605798400ULL; + ASSERT_EQ(576460752302899200ULL, + MultiplyUIntModLazy<64>(1152921504605798401ULL, y, mod)); + y = 1152921504605798401ULL; + ASSERT_EQ(576460752302899200ULL, + MultiplyUIntModLazy<64>(1152921504605798400ULL, y, mod)); + ASSERT_EQ(1729382256908697601ULL, + MultiplyUIntModLazy<64>(1152921504605798401ULL, y, mod)); + y = 2305843009211596800ULL; + ASSERT_EQ(2305843009211596802ULL, + MultiplyUIntModLazy<64>(2305843009211596800ULL, y, mod)); +} + +TEST(NumberTheory, MultiplyUIntModLazy52) { + uint64_t mod = 2; + uint64_t y = 0; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<52>(0, y, mod)); + ASSERT_EQ(0ULL, MultiplyUIntModLazy<52>(1, y, mod)); + y = 1; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<52>(0, y, mod)); + ASSERT_EQ(1ULL, MultiplyUIntModLazy<52>(1, y, mod)); + + mod = 10; + y = 0; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<52>(0, y, mod)); + ASSERT_EQ(0ULL, MultiplyUIntModLazy<52>(1, y, mod)); + y = 1; + ASSERT_EQ(0ULL, MultiplyUIntModLazy<52>(0, y, mod)); + ASSERT_EQ(1ULL, MultiplyUIntModLazy<52>(1, y, mod)); + y = 6; + ASSERT_EQ(2ULL, MultiplyUIntModLazy<52>(7, y, mod)); + y = 7; + ASSERT_EQ(9ULL, MultiplyUIntModLazy<52>(7, y, mod)); + ASSERT_EQ(2ULL, MultiplyUIntModLazy<52>(6, y, mod)); + +#ifdef HEXL_DEBUG + y = 1152921504605798400ULL; + EXPECT_ANY_THROW(MultiplyUIntModLazy<52>(1152921504605798401ULL, y, mod)); + y = 1152921504605798401ULL; + EXPECT_ANY_THROW(MultiplyUIntModLazy<52>(1152921504605798400ULL, y, mod)); + EXPECT_ANY_THROW(MultiplyUIntModLazy<52>(1152921504605798401ULL, y, mod)); + y = 2305843009211596800ULL; + EXPECT_ANY_THROW(MultiplyUIntModLazy<52>(2305843009211596800ULL, y, mod)); +#endif +} + +TEST(NumberTheory, MaximumValue) { + ASSERT_EQ(MaximumValue(64), 0xffffffffffffffff); + ASSERT_EQ(MaximumValue(52), 0xfffffffffffff); +} + +TEST(NumberTheory, IsPrime) { + ASSERT_TRUE(IsPrime(2)); + ASSERT_TRUE(IsPrime(3)); + ASSERT_TRUE(IsPrime(5)); + ASSERT_TRUE(IsPrime(2305843009211596801ULL)); + ASSERT_TRUE(IsPrime(2305843009211596801ULL)); + ASSERT_TRUE(IsPrime(36893488147419103ULL)); + ASSERT_TRUE(IsPrime(0xffffffffffc0001ULL)); + ASSERT_TRUE(IsPrime(0xffffee001)); + + ASSERT_FALSE(IsPrime(72307ULL * 59399ULL)); + ASSERT_FALSE(IsPrime(2305843009211596802ULL)); + ASSERT_FALSE(IsPrime(36893488147419107ULL)); +} + +TEST(NumberTheory, GeneratePrimes) { + for (int bit_size = 40; bit_size < 62; ++bit_size) { + std::vector primes = GeneratePrimes(10, bit_size, 4096); + ASSERT_EQ(primes.size(), 10); + for (const auto& prime : primes) { + ASSERT_EQ(prime % 8192, 1); + ASSERT_TRUE(IsPrime(prime)); + ASSERT_TRUE(prime <= (1ULL << (bit_size + 1))); + ASSERT_TRUE(prime >= (1ULL << bit_size)); + } + } +} + +TEST(NumberTheory, AddUInt64) { + uint64_t result; + EXPECT_EQ(0, AddUInt64(1, 0, &result)); + EXPECT_EQ(1, result); + + EXPECT_EQ(0, AddUInt64(1, 1, &result)); + EXPECT_EQ(2, result); + + EXPECT_EQ(0, AddUInt64(10, 7, &result)); + EXPECT_EQ(17, result); + + EXPECT_EQ(0, AddUInt64(1ULL << 32, 1ULL << 16, &result)); + EXPECT_EQ(4295032832, result); + + // Test overflow + EXPECT_EQ(1, AddUInt64(1ULL << 63, 1ULL << 63, &result)); + EXPECT_EQ(0, result); + + EXPECT_EQ(1, AddUInt64((1ULL << 63) + 1, 1ULL << 63, &result)); + EXPECT_EQ(1, result); + + EXPECT_EQ(1, AddUInt64((1ULL << 63) + 13, (1ULL << 63) + 17, &result)); + EXPECT_EQ(30, result); +} + +TEST(NumberTheory, AddUIntMod) { + { + uint64_t modulus = 2; + EXPECT_EQ(1, AddUIntMod(1, 0, modulus)); + EXPECT_EQ(1, AddUIntMod(0, 1, modulus)); + EXPECT_EQ(0, AddUIntMod(1, 1, modulus)); + } + + { + uint64_t modulus = 10; + EXPECT_EQ(0, AddUIntMod(3, 7, modulus)); + EXPECT_EQ(0, AddUIntMod(4, 6, modulus)); + EXPECT_EQ(1, AddUIntMod(5, 6, modulus)); + EXPECT_EQ(2, AddUIntMod(6, 6, modulus)); + } + + { + uint64_t modulus = 1ULL << 63; + EXPECT_EQ(10, AddUIntMod(3, 7, modulus)); + EXPECT_EQ(0, AddUIntMod(modulus - 1, 1, modulus)); + EXPECT_EQ(1, AddUIntMod(modulus - 1, 2, modulus)); + EXPECT_EQ(modulus - 4, AddUIntMod(modulus - 1, modulus - 3, modulus)); + } +} + +TEST(NumberTheory, SubUIntMod) { + { + uint64_t modulus = 2; + EXPECT_EQ(1, SubUIntMod(1, 0, modulus)); + EXPECT_EQ(1, SubUIntMod(0, 1, modulus)); + EXPECT_EQ(0, SubUIntMod(1, 1, modulus)); + } + + { + uint64_t modulus = 10; + EXPECT_EQ(6, SubUIntMod(3, 7, modulus)); + EXPECT_EQ(8, SubUIntMod(4, 6, modulus)); + EXPECT_EQ(2, SubUIntMod(6, 4, modulus)); + EXPECT_EQ(0, SubUIntMod(6, 6, modulus)); + } + + { + uint64_t modulus = 1ULL << 63; + EXPECT_EQ(modulus - 4, SubUIntMod(3, 7, modulus)); + EXPECT_EQ(modulus - 2, SubUIntMod(modulus - 1, 1, modulus)); + EXPECT_EQ(3, SubUIntMod(2, modulus - 1, modulus)); + EXPECT_EQ(2, SubUIntMod(modulus - 1, modulus - 3, modulus)); + } +} + +TEST(NumberTheory, DivideUInt128UInt64Lo) { + EXPECT_EQ(0ULL, DivideUInt128UInt64Lo(0ULL, 0ULL, 2ULL)); + EXPECT_EQ(9460151ULL, DivideUInt128UInt64Lo(0ULL, 4294908658ULL, 454ULL)); + EXPECT_EQ(10ULL, DivideUInt128UInt64Lo(0ULL, 4294908658ULL, 429490865ULL)); + EXPECT_EQ(0xffffffffffffffffULL, + DivideUInt128UInt64Lo(0ULL, 0xffffffffffffffffULL, 1ULL)); + EXPECT_EQ(1ULL, DivideUInt128UInt64Lo(0ULL, 0xfffffffULL, 0xfffffffULL)); + EXPECT_EQ(4294908659ULL, + DivideUInt128UInt64Lo(4294908658ULL, 0xffffffffffffffffULL, + 0xffffffffffffffffULL)); +} + +} // namespace hexl +} // namespace intel diff --git a/test/test-public-api.cpp b/test/test-public-api.cpp new file mode 100644 index 00000000..f67954a1 --- /dev/null +++ b/test/test-public-api.cpp @@ -0,0 +1,6 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "intel-hexl/intel-hexl.hpp" + +int main() { return 0; } diff --git a/test/test-util.hpp b/test/test-util.hpp new file mode 100644 index 00000000..1031e2b0 --- /dev/null +++ b/test/test-util.hpp @@ -0,0 +1,70 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +#include "logging/logging.hpp" +#include "util/avx512-util.hpp" +#include "util/check.hpp" + +namespace intel { +namespace hexl { + +// Checks whether x == y. +inline void CheckEqual(const std::vector& x, + const std::vector& y) { + EXPECT_EQ(x.size(), y.size()); + uint64_t N = x.size(); + for (size_t i = 0; i < N; ++i) { + EXPECT_EQ(x[i], y[i]); + } +} + +// Asserts x == y +template +inline void AssertEqual(const std::vector& x, const std::vector& y) { + ASSERT_EQ(x.size(), y.size()); + uint64_t N = x.size(); + for (size_t i = 0; i < N; ++i) { + ASSERT_EQ(x[i], y[i]); + } +} + +#ifdef HEXL_HAS_AVX512DQ +inline void CheckEqual(const __m512i a, const __m512i b) { + std::vector as = ExtractValues(a); + std::vector bs = ExtractValues(b); + CheckEqual(as, bs); +} + +inline void AssertEqual(const __m512i a, const __m512i b) { + std::vector as = ExtractValues(a); + std::vector bs = ExtractValues(b); + AssertEqual(as, bs); +} + +// Returns true iff a == b +// Logs an error if a != b +inline bool Equals(__m512i a, __m512i b) { + bool match = true; + + std::vector as = ExtractValues(a); + std::vector bs = ExtractValues(b); + + for (size_t i = 0; i < 8; ++i) { + if (as[i] != bs[i]) { + std::cerr << "Mismatch at index " << i << ": " + << "a[" << i << "] = " << as[i] << ", b[" << i + << "] = " << bs[i] << "\n"; + match = false; + } + } + return match; +} +#endif + +} // namespace hexl +} // namespace intel