MPI support as plugins (#966)

Implementing MPI support as plugins as proposed in this [RFC](#802). This helps improve compatibility in some deployment scenarios: - Docker images: `libcudaq.so` is no longer directly linked against OpenMPI in the docker image; hence allowing MPI lib swap when using the image. - Python wheels: with dynamical plugins, we don't need to [disable MPI support during build](#920). MPI support is deferred to runtime with support for mpi4py-based plugin or manually activated native MPI plugin.
NVIDIA · Dec 5, 2023 · 035b084 · 035b084
1 parent 4343201
commit 035b084
Show file tree

Hide file tree

Showing 34 changed files with 2,420 additions and 131 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -126,10 +126,17 @@ jobs:
       matrix:
         platform: [amd64, arm64]
         toolchain: [llvm, clang16, gcc12]
+        mpi: [openmpi, mpich]
+        exclude:
+          - toolchain: llvm
+            mpi: mpich
+          - toolchain: clang16
+            mpi: mpich
       fail-fast: false
     uses: ./.github/workflows/test_in_devenv.yml
     with:
       platform: linux/${{ matrix.platform }}
+      mpi: ${{ matrix.mpi }}
       devdeps_image: ${{ fromJson(needs.config.outputs.json).image_hash[format('{0}-{1}', matrix.platform, matrix.toolchain)] }}
       devdeps_cache: ${{ fromJson(needs.config.outputs.json).cache_key[format('{0}-{1}', matrix.platform, matrix.toolchain)] }}
       devdeps_archive: ${{ fromJson(needs.config.outputs.json).tar_archive[format('{0}-{1}', matrix.platform, matrix.toolchain)] }}

diff --git a/.github/workflows/publishing.yml b/.github/workflows/publishing.yml
@@ -607,6 +607,35 @@ jobs:
             exit $status_sum
           fi
 
+      - name: MPI validation 
+        shell: bash
+        run: |
+          set +e # Allow script to keep going through errors
+          for ex in `find /home/cudaq/examples/other/distributed/ -name '*.cpp'`; do
+            # Set CUDAQ_ENABLE_MPI_EXAMPLE to activate these examples.
+            nvq++ -DCUDAQ_ENABLE_MPI_EXAMPLE=1 $ex
+            status=$?
+            if [ $status -eq 0 ]; then
+              # Run with mpiexec
+              mpiexec --allow-run-as-root -np 4 ./a.out
+              status=$?
+              if [ $status -eq 0 ]; then
+                echo ":white_check_mark: Successfully ran $filename." >> $GITHUB_STEP_SUMMARY
+              else
+                echo ":x: Failed to execute $filename." >> $GITHUB_STEP_SUMMARY
+                status_sum=$((status_sum+1))
+              fi
+            else
+              echo ":x: Compilation failed for $filename." >> $GITHUB_STEP_SUMMARY
+              status_sum=$((status_sum+1))
+            fi
+          done
+          set -e # Re-enable exit code error checking
+          if [ ! $status_sum -eq 0 ]; then
+            echo "::error::$status_sum examples failed; see step summary for a list of failures."
+            exit $status_sum
+          fi
+
   create_release:
     name: CUDA Quantum Release
     needs: [assets, cudaq_hpc, cudaq_wheels, validation]

diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
@@ -216,6 +216,50 @@ jobs:
               fi
             done
 
+      - name: Run Python MPI tests
+        if: matrix.os_image == 'redhat/ubi9:9.2'
+        uses: ./.github/actions/run-in-docker
+        with:
+          image: wheel_validation:local
+          shell: bash
+          run: |
+            # Install openmpi and mpi4py by conda
+            # Install conda
+            dnf install -y --nobest --setopt=install_weak_deps=False wget openssh-clients.$(uname -m)
+            mkdir -p ~/miniconda3
+            wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$(uname -m).sh -O ~/miniconda3/miniconda.sh
+            bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+            ~/miniconda3/bin/conda init bash
+            source ~/.bashrc
+            # Extract the setup script from Python wheel's readme
+            readme_file="/tmp/README.md"
+            python_version="${{ inputs.python_version }}"
+            # Parse README file to install openmpi
+            conda_script="$(awk '/(Begin conda install)/{flag=1;next}/(End conda install)/{flag=0}flag' $readme_file | grep . | sed '/^```/d')" 
+            # Skip the installation of CUDA since we don't need CUDA for this test.
+            regex='conda install -y -n cuda-quantum.*cuda'
+            while IFS= read -r line; do
+              if [[ ! "$line" =~ $regex ]]; then
+                  # Replace Python version
+                  line=${line//3.10/$python_version}
+                  # Install the wheel file
+                  line=${line//pip install cuda-quantum/pip install /tmp/cuda_quantum-0.0.0-*-manylinux_*_$(uname -m).whl}
+                  eval "$line"
+              fi
+            done <<< "$conda_script"
+            ompi_script="$(awk '/(Begin ompi setup)/{flag=1;next}/(End ompi setup)/{flag=0}flag' $readme_file | grep . | sed '/^```/d')" 
+            while IFS= read -r line; do
+                eval "$line"
+            done <<< "$ompi_script"
+            # Run the MPI test
+            python${{ inputs.python_version }} -m pip install pytest numpy
+            mpirun --allow-run-as-root -np 4 python${{ inputs.python_version }} -m pytest -v /tmp/tests/parallel/test_mpi_api.py
+            pytest_mpi_status=$?
+            if [ ! $pytest_mpi_status -eq 0 ]; then
+              echo "::error file=python_wheel.yml::Python MPI plugin test failed with status $pytest_mpi_status."
+              exit 1
+            fi
+
       - name: Validate Python examples
         run: |
           docker run --rm -dit --name wheel-validation wheel_validation:local

diff --git a/.github/workflows/test_in_devenv.yml b/.github/workflows/test_in_devenv.yml
@@ -5,6 +5,10 @@ on:
         type: string
         required: false
         default: linux/amd64
+      mpi:
+        type: string
+        required: false
+        default: openmpi
       devdeps_image:
         required: false
         type: string
@@ -65,7 +69,8 @@ jobs:
           DOCKER_BUILDKIT=1 docker build --platform ${{ inputs.platform }} \
             -t cuda-quantum-dev:local -f docker/build/cudaq.dev.Dockerfile . \
             --build-arg base_image=$base_image \
-            --build-arg install="CMAKE_BUILD_TYPE=Debug"
+            --build-arg install="CMAKE_BUILD_TYPE=Debug" \
+            --build-arg mpi="${{ inputs.mpi }}"
 
       - name: Test CUDA Quantum
         uses: ./.github/actions/run-in-docker
@@ -84,6 +89,52 @@ jobs:
               exit 1
             fi
 
+      - name: Test CUDA Quantum MPI4Py Plugin
+        uses: ./.github/actions/run-in-docker
+        with:
+          image: cuda-quantum-dev:local
+          shell: bash
+          run: |
+            cd $CUDAQ_REPO_ROOT
+            python3 -m pip install mpi4py~=3.1
+            rm -f build/lib/plugins/libcudaq-comm-plugin.so
+            ctest --test-dir build -R MPIApiTest -V
+            mpi4py_status=$?   
+            if [ ! $mpi4py_status -eq 0 ] ; then
+              echo "::error file=test_in_devenv.yml::Test CUDA Quantum MPI4Py Plugin failed with status $mpi4py_status."
+              exit 1
+            fi
+      
+      - name: Test CUDA Quantum MPI Plugin Activation
+        uses: ./.github/actions/run-in-docker
+        with:
+          image: cuda-quantum-dev:local
+          shell: bash
+          run: |
+            # Set MPI_PATH depending on OMPI/MPICH
+            has_ompiinfo=$(which ompi_info || true)
+            if [[ ! -z $has_ompiinfo ]]; then
+              export MPI_PATH="/usr/lib/$(uname -m)-linux-gnu/openmpi/"
+            else
+              export MPI_PATH="/usr/lib/$(uname -m)-linux-gnu/mpich/"
+            fi
+            # Run the activation script
+            source $CUDAQ_INSTALL_PREFIX/distributed_interfaces/activate_custom_mpi.sh
+            external_plugin_build_status=$?
+            if [ ! $external_plugin_build_status -eq 0 ] ; then
+              echo "::error file=test_in_devenv.yml::Test CUDA Quantum MPI Plugin Activation failed to activate the plugin with status $external_plugin_build_status."
+              exit 1
+            fi
+            echo $CUDAQ_MPI_COMM_LIB
+            # Rerun the MPI plugin test
+            cd $CUDAQ_REPO_ROOT
+            ctest --test-dir build -R MPIApiTest -V
+            external_plugin_status=$?   
+            if [ ! $external_plugin_status -eq 0 ] ; then
+              echo "::error file=test_in_devenv.yml::Test CUDA Quantum MPI Plugin Activation failed with status $external_plugin_status."
+              exit 1
+            fi
+
       - name: Save environment
         id: env_save
         if: inputs.export_environment

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -432,6 +432,8 @@ if (NOT CUDAQ_SKIP_MPI)
   find_package(MPI COMPONENTS CXX)
   if (MPI_FOUND) 
     message(STATUS "MPI CXX Found: ${MPIEXEC}")
+    # Build the built-in MPI Comm plugin
+    add_subdirectory(runtime/cudaq/distributed/builtin)
   endif()
 endif()
 

diff --git a/docker/build/cudaq.dev.Dockerfile b/docker/build/cudaq.dev.Dockerfile
@@ -35,6 +35,17 @@ ARG destination="$CUDAQ_REPO_ROOT"
 ADD "$workspace" "$destination"
 WORKDIR "$destination"
 
+# mpich or openmpi
+ARG mpi=
+RUN if [ -n "$mpi" ]; \
+    then \
+        if [ ! -z "$MPI_PATH" ]; then \
+            echo "Using a base image with MPI is not supported when passing a 'mpi' build argument." && exit 1; \
+        else \
+			apt update && apt install -y lib$mpi-dev ; \
+		fi \
+    fi
+
 # Configuring a base image that contains the necessary dependencies for GPU
 # accelerated components and passing a build argument 
 #   install="CMAKE_BUILD_TYPE=Release FORCE_COMPILE_GPU_COMPONENTS=true"

diff --git a/docker/test/debian.Dockerfile b/docker/test/debian.Dockerfile
@@ -32,6 +32,7 @@ ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp311-cp311-manylinux_2_28_x86_64.whl
 COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel
 COPY docs/sphinx/examples/python /tmp/examples/
 COPY python/tests /tmp/tests/
+COPY python/README.md /tmp/README.md
 
 RUN if [ -n "$pip_install_flags" ]; then \
         # We can't install with a --user flag in a virtual environment unless we enable this.

diff --git a/docker/test/fedora.Dockerfile b/docker/test/fedora.Dockerfile
@@ -26,6 +26,7 @@ ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp310-cp310-manylinux_2_28_x86_64.whl
 COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel
 COPY docs/sphinx/examples/python /tmp/examples/
 COPY python/tests /tmp/tests/
+COPY python/README.md /tmp/README.md
 
 RUN python${python_version} -m pip install ${pip_install_flags} /tmp/$cuda_quantum_wheel
 RUN if [ -n "$optional_dependencies" ]; then python${python_version} -m pip install cuda-quantum[$optional_dependencies]; fi
diff --git a/docker/test/opensuse.Dockerfile b/docker/test/opensuse.Dockerfile
@@ -27,6 +27,7 @@ ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp39-cp39-manylinux_2_28_x86_64.whl
 COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel
 COPY docs/sphinx/examples/python /tmp/examples/
 COPY python/tests /tmp/tests/
+COPY python/README.md /tmp/README.md
 
 RUN python${python_version} -m pip install ${pip_install_flags} /tmp/$cuda_quantum_wheel
 RUN if [ -n "$optional_dependencies" ]; then python${python_version} -m pip install cuda-quantum[$optional_dependencies]; fi
diff --git a/docker/test/redhat.Dockerfile b/docker/test/redhat.Dockerfile
@@ -26,6 +26,7 @@ ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp311-cp311-manylinux_2_28_x86_64.whl
 COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel
 COPY docs/sphinx/examples/python /tmp/examples/
 COPY python/tests /tmp/tests/
+COPY python/README.md /tmp/README.md
 
 RUN python${python_version} -m pip install ${pip_install_flags} /tmp/$cuda_quantum_wheel
 RUN if [ -n "$optional_dependencies" ]; then python${python_version} -m pip install cuda-quantum[$optional_dependencies]; fi
diff --git a/docker/test/ubuntu.Dockerfile b/docker/test/ubuntu.Dockerfile
@@ -25,6 +25,7 @@ ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp310-cp310-manylinux_2_28_x86_64.whl
 COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel
 COPY docs/sphinx/examples/python /tmp/examples/
 COPY python/tests /tmp/tests/
+COPY python/README.md /tmp/README.md
 
 RUN python${python_version} -m pip install ${pip_install_flags} /tmp/$cuda_quantum_wheel
 RUN if [ -n "$optional_dependencies" ]; then python${python_version} -m pip install cuda-quantum[$optional_dependencies]; fi
diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst
@@ -166,6 +166,8 @@ Namespaces
 .. doxygenfunction:: cudaq::mpi::finalize
 .. doxygenfunction:: cudaq::mpi::rank
 .. doxygenfunction:: cudaq::mpi::num_ranks
-.. doxygenfunction:: cudaq::mpi::all_gather
+.. doxygenfunction:: cudaq::mpi::all_gather(std::vector<double> &global, const std::vector<double> &local)
+.. doxygenfunction:: cudaq::mpi::all_gather(std::vector<int> &global, const std::vector<int> &local)
 .. doxygenfunction:: cudaq::mpi::all_reduce(const T&, const Func&)
 .. doxygenfunction:: cudaq::mpi::all_reduce(const T &localValue, const BinaryFunction &function)
+.. doxygenfunction:: cudaq::mpi::broadcast
diff --git a/docs/sphinx/api/languages/python_api.rst b/docs/sphinx/api/languages/python_api.rst
@@ -207,5 +207,6 @@ MPI Submodule
 .. automethod:: cudaq.mpi::rank
 .. automethod:: cudaq.mpi::num_ranks
 .. automethod:: cudaq.mpi::all_gather
+.. automethod:: cudaq.mpi::broadcast
 .. automethod:: cudaq.mpi::is_initialized
 .. automethod:: cudaq.mpi::finalize
diff --git a/docs/sphinx/examples/cpp/other/distributed/mpi.cpp b/docs/sphinx/examples/cpp/other/distributed/mpi.cpp
@@ -0,0 +1,68 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2023 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+// Compile and run with:
+// ```
+// nvq++ mpi.cpp -DCUDAQ_ENABLE_MPI_EXAMPLE=1 -o mpi.x && mpiexec -np 4 ./mpi.x
+// ```
+
+// This example demonstrates CUDA Quantum MPI support.
+
+#ifndef CUDAQ_ENABLE_MPI_EXAMPLE
+#define CUDAQ_ENABLE_MPI_EXAMPLE 0
+#endif
+
+#include <cudaq.h>
+
+int main(int argc, char **argv) {
+#if CUDAQ_ENABLE_MPI_EXAMPLE == 0
+  return 0;
+#else
+  // Initialize MPI
+  cudaq::mpi::initialize();
+
+  if (cudaq::mpi::rank() == 0)
+    printf("Running MPI example with %d processes.\n", cudaq::mpi::num_ranks());
+
+  using namespace cudaq::spin;
+  cudaq::spin_op h = 5.907 - 2.1433 * x(0) * x(1) - 2.1433 * y(0) * y(1) +
+                     .21829 * z(0) - 6.125 * z(1);
+  auto ansatz = [](double theta) __qpu__ {
+    cudaq::qubit q, r;
+    x(q);
+    ry(theta, r);
+    x<cudaq::ctrl>(r, q);
+  };
+
+  // In addition to the built-in `MQPU` platform, users can construct MPI
+  // application directly using CUDA Quantum MPI support.
+  const auto allParams =
+      cudaq::random_vector(-M_PI, M_PI, cudaq::mpi::num_ranks());
+
+  // For example, each MPI process can run `cudaq::observe` for a different
+  // parameter.
+  const double rankParam = allParams[cudaq::mpi::rank()];
+  const double rankResult = cudaq::observe(ansatz, h, rankParam);
+  printf("[Process %d]: Energy(%lf) = %lf.\n", cudaq::mpi::rank(), rankParam,
+         rankResult);
+  // Then, using `cudaq::mpi::all_gather` to collect all the results.
+  std::vector<double> gatherData(cudaq::mpi::num_ranks());
+  cudaq::mpi::all_gather(gatherData, {rankResult});
+  if (cudaq::mpi::rank() == 0) {
+    printf("Gathered data from all ranks: \n");
+    for (const auto &x : gatherData)
+      printf("%lf\n", x);
+  }
+
+  // Verify that the data has been assembled as expected.
+  if (std::abs(gatherData[cudaq::mpi::rank()] - rankResult) > 1e-12)
+    return -1;
+  cudaq::mpi::finalize();
+  return 0;
+#endif
+}
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
@@ -72,6 +72,7 @@ else()
   install(TARGETS _pycudaq LIBRARY DESTINATION cudaq)
 endif()
 
+add_subdirectory(runtime/cudaq/distributed)
 if (NOT CUDAQ_DISABLE_CPP_FRONTEND)
   add_subdirectory(runtime/cudaq/domains/plugins)
 endif()

diff --git a/python/README.md b/python/README.md
@@ -42,6 +42,8 @@ you can install a minimal version following the instructions
 The following commands will create and activate a complete environment for
 CUDA Quantum with all its dependencies:
 
+[//]: # (Begin conda install)
+
 ```console
     conda create -y -n cuda-quantum python==3.10 pip
     conda install -y -n cuda-quantum -c "nvidia/label/cuda-11.8.0" cuda
@@ -51,12 +53,18 @@ CUDA Quantum with all its dependencies:
     conda activate cuda-quantum
 ```
 
+[//]: # (End conda install)
+
 You must configure MPI by setting the following environment variables:
 
+[//]: # (Begin ompi setup)
+
 ```console
   export OMPI_MCA_opal_cuda_support=true OMPI_MCA_btl='^openib'
 ```
 
+[//]: # (End ompi setup)
+
 *If you do not set these variables you may encounter a segmentation fault.*
 
 **Important**: It is *not* sufficient to set these variable within the conda