MPI compilation fixes.

FLAMEGPU · Jul 20, 2023 · 8e05d93 · 8e05d93
1 parent 0c876d0
commit 8e05d93
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 16 deletions.
diff --git a/include/flamegpu/simulation/CUDAEnsemble.h b/include/flamegpu/simulation/CUDAEnsemble.h
@@ -28,7 +28,7 @@ class CUDAEnsemble {
         // Sent from worker to manager to request a job index to process
         RequestJob = 0,
         // Sent from manager to worker to assign a job index to process
-        AssignJob = 1;
+        AssignJob = 1,
     };
 #endif
     /**

diff --git a/include/flamegpu/simulation/detail/MPISimRunner.h b/include/flamegpu/simulation/detail/MPISimRunner.h
@@ -29,13 +29,12 @@ namespace detail {
  * There may be multiple instances per GPU, if running small models on large GPUs.
  */
 class MPISimRunner : public AbstractSimRunner {
+ public:
     enum Signal : unsigned int {
         // MPISimRunner sets this to notify manager that it wants a new job
         RequestJob = UINT_MAX,
         RunFailed = UINT_MAX-1,
     };
-
- public:
     /**
      * Constructor, creates and initialise a new MPISimRunner
      * @param _model A copy of the ModelDescription hierarchy for the RunPlanVector, this is used to create the CUDASimulation instances.

diff --git a/src/flamegpu/simulation/CUDAEnsemble.cu b/src/flamegpu/simulation/CUDAEnsemble.cu
@@ -73,7 +73,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
         // Init MPI, fetch rank and size
         int thread_provided = 0;
         // MPI single means that only the main thread will perform MPI actions
-        MPI_INIT_THREAD(NULL, NULL, MPI_THREAD_SINGLE, &thread_provided);
+        MPI_Init_thread(NULL, NULL, MPI_THREAD_SINGLE, &thread_provided);
         if (thread_provided != MPI_THREAD_SINGLE) {
             THROW exception::UnknownInternalError("MPI unable to provide MPI_THREAD_SINGLE support");
         }
@@ -221,16 +221,17 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
         detail::MPISimRunner** runners = reinterpret_cast<detail::MPISimRunner**>(_runners);
         next_runs = std::vector<std::atomic<unsigned int>>(TOTAL_RUNNERS);
         for (auto& nr : next_runs)
-            std::atomic_init(&nr, MPISimRunner::Signal::RequestJob);
+            std::atomic_init(&nr, detail::MPISimRunner::Signal::RequestJob);
         {
             unsigned int i = 0;
             for (auto& d : devices) {
                 for (unsigned int j = 0; j < config.concurrent_runs; ++j) {
-                    runners[i++] = new detail::MPISimRunner(model, err_ct, next_runs[i], plans,
+                    runners[i] = new detail::MPISimRunner(model, err_ct, next_runs[i], plans,
                         step_log_config, exit_log_config,
                         d, j,
                         config.verbosity,
                         run_logs, log_export_queue, log_export_queue_mutex, log_export_queue_cdn, fast_err_detail, TOTAL_RUNNERS, isSWIG);
+                        ++i;
                 }
             }
         }
@@ -247,15 +248,15 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
                 // Check whether local runners require a job assignment
                 for (auto &r : next_runs) {
                     unsigned int run_id = r.load();
-                    if (run_id == MPISimRunner::Signal::RunFailed) {
+                    if (run_id == detail::MPISimRunner::Signal::RunFailed) {
                         if (config.error_level == EnsembleConfig::Fast) {
                             // @todo
-                        } else if () {
+                        } else {
                             // @todo
                         }
-                        run_id = MPISimRunner::Signal::RequestJob
+                        run_id = detail::MPISimRunner::Signal::RequestJob;
                     }
-                    if (run_id == MPISimRunner::Signal::RequestJob) {
+                    if (run_id == detail::MPISimRunner::Signal::RequestJob) {
                         r.store(next_run++);
                     }
                 }
@@ -268,7 +269,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
                     &status);                 // MPI_Status*
                 while (flag) {
                     // Receive the message (kind of redundant as we already have the status and it carrys no data)
-                    memset(&status, 0, sizeof(MPI_STATUS));
+                    memset(&status, 0, sizeof(MPI_Status));
                     MPI_Recv(
                         nullptr,                  // void* data
                         0,                        // int count
@@ -292,7 +293,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
                     MPI_Iprobe(MPI_ANY_SOURCE, EnvelopeTag::RequestJob, MPI_COMM_WORLD, &flag, &status);
                 }
                 // Print progress to console
-                if (verbosity >= Verbosity::Default && next_run_reported != next_run) {
+                if (config.verbosity >= Verbosity::Default && next_run_reported != next_run) {
                     const int progress = static_cast<int>(next_run) - static_cast<int>(TOTAL_RUNNERS * world_size);
                     fprintf(stdout, "\rCUDAEnsemble progress: %d/%u", progress < 0 ? 0 : progress, static_cast<unsigned int>(plans.size()));
                     fflush(stdout);
@@ -304,11 +305,12 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
         } else {
             // Wait for all runs to have been assigned, and all MPI runners to have been notified of fin
             unsigned int next_run = 0;
+            MPI_Status status;
             while (next_run < plans.size()) {
                 // Check whether local runners require a job assignment
                 for (auto& r : next_runs) {
                     const unsigned int runner_status = r.load();
-                    if (runner_status == MPISimRunner::Signal::RequestJob) {
+                    if (runner_status == detail::MPISimRunner::Signal::RequestJob) {
                         // Send a job request to 0, these have no data
                         MPI_Send(
                             nullptr,                  // void* data
@@ -328,7 +330,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
                             MPI_COMM_WORLD,          // MPI_Comm communicator
                             &status);                // MPI_Status* status
                         // Break if assigned job is out of range, work is finished
-                        if (next_run >= plan.size()) {
+                        if (next_run >= plans.size()) {
                             break;
                         }
                         // Pass the job to runner that requested it
@@ -383,7 +385,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
 
 #ifdef FLAMEGPU_ENABLE_MPI
     if (config.mpi) {
-        MPI_Finalize()
+        MPI_Finalize();
     }
 #endif
     // Record and store the elapsed time

diff --git a/src/flamegpu/simulation/detail/MPISimRunner.cu b/src/flamegpu/simulation/detail/MPISimRunner.cu
@@ -53,7 +53,7 @@ MPISimRunner::MPISimRunner(const std::shared_ptr<const ModelData> _model,
 void MPISimRunner::start() {
     // While there are still plans to process
     while (true) {
-        const int run_id = next_run.load();
+        const unsigned int run_id = next_run.load();
         if (run_id < plans.size()) {
             // Process the assigned job
             try {