Skip to content

Commit

Permalink
MPI compilation fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
Robadob committed Jul 20, 2023
1 parent 0c876d0 commit 8e05d93
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 16 deletions.
2 changes: 1 addition & 1 deletion include/flamegpu/simulation/CUDAEnsemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class CUDAEnsemble {
// Sent from worker to manager to request a job index to process
RequestJob = 0,
// Sent from manager to worker to assign a job index to process
AssignJob = 1;
AssignJob = 1,
};
#endif
/**
Expand Down
3 changes: 1 addition & 2 deletions include/flamegpu/simulation/detail/MPISimRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ namespace detail {
* There may be multiple instances per GPU, if running small models on large GPUs.
*/
class MPISimRunner : public AbstractSimRunner {
public:
enum Signal : unsigned int {
// MPISimRunner sets this to notify manager that it wants a new job
RequestJob = UINT_MAX,
RunFailed = UINT_MAX-1,
};

public:
/**
* Constructor, creates and initialise a new MPISimRunner
* @param _model A copy of the ModelDescription hierarchy for the RunPlanVector, this is used to create the CUDASimulation instances.
Expand Down
26 changes: 14 additions & 12 deletions src/flamegpu/simulation/CUDAEnsemble.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
// Init MPI, fetch rank and size
int thread_provided = 0;
// MPI single means that only the main thread will perform MPI actions
MPI_INIT_THREAD(NULL, NULL, MPI_THREAD_SINGLE, &thread_provided);
MPI_Init_thread(NULL, NULL, MPI_THREAD_SINGLE, &thread_provided);
if (thread_provided != MPI_THREAD_SINGLE) {
THROW exception::UnknownInternalError("MPI unable to provide MPI_THREAD_SINGLE support");
}
Expand Down Expand Up @@ -221,16 +221,17 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
detail::MPISimRunner** runners = reinterpret_cast<detail::MPISimRunner**>(_runners);
next_runs = std::vector<std::atomic<unsigned int>>(TOTAL_RUNNERS);
for (auto& nr : next_runs)
std::atomic_init(&nr, MPISimRunner::Signal::RequestJob);
std::atomic_init(&nr, detail::MPISimRunner::Signal::RequestJob);
{
unsigned int i = 0;
for (auto& d : devices) {
for (unsigned int j = 0; j < config.concurrent_runs; ++j) {
runners[i++] = new detail::MPISimRunner(model, err_ct, next_runs[i], plans,
runners[i] = new detail::MPISimRunner(model, err_ct, next_runs[i], plans,
step_log_config, exit_log_config,
d, j,
config.verbosity,
run_logs, log_export_queue, log_export_queue_mutex, log_export_queue_cdn, fast_err_detail, TOTAL_RUNNERS, isSWIG);
++i;
}
}
}
Expand All @@ -247,15 +248,15 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
// Check whether local runners require a job assignment
for (auto &r : next_runs) {
unsigned int run_id = r.load();
if (run_id == MPISimRunner::Signal::RunFailed) {
if (run_id == detail::MPISimRunner::Signal::RunFailed) {
if (config.error_level == EnsembleConfig::Fast) {
// @todo
} else if () {
} else {
// @todo
}
run_id = MPISimRunner::Signal::RequestJob
run_id = detail::MPISimRunner::Signal::RequestJob;
}
if (run_id == MPISimRunner::Signal::RequestJob) {
if (run_id == detail::MPISimRunner::Signal::RequestJob) {
r.store(next_run++);
}
}
Expand All @@ -268,7 +269,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
&status); // MPI_Status*
while (flag) {
// Receive the message (kind of redundant as we already have the status and it carrys no data)
memset(&status, 0, sizeof(MPI_STATUS));
memset(&status, 0, sizeof(MPI_Status));
MPI_Recv(
nullptr, // void* data
0, // int count
Expand All @@ -292,7 +293,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
MPI_Iprobe(MPI_ANY_SOURCE, EnvelopeTag::RequestJob, MPI_COMM_WORLD, &flag, &status);
}
// Print progress to console
if (verbosity >= Verbosity::Default && next_run_reported != next_run) {
if (config.verbosity >= Verbosity::Default && next_run_reported != next_run) {
const int progress = static_cast<int>(next_run) - static_cast<int>(TOTAL_RUNNERS * world_size);
fprintf(stdout, "\rCUDAEnsemble progress: %d/%u", progress < 0 ? 0 : progress, static_cast<unsigned int>(plans.size()));
fflush(stdout);
Expand All @@ -304,11 +305,12 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
} else {
// Wait for all runs to have been assigned, and all MPI runners to have been notified of fin
unsigned int next_run = 0;
MPI_Status status;
while (next_run < plans.size()) {
// Check whether local runners require a job assignment
for (auto& r : next_runs) {
const unsigned int runner_status = r.load();
if (runner_status == MPISimRunner::Signal::RequestJob) {
if (runner_status == detail::MPISimRunner::Signal::RequestJob) {
// Send a job request to 0, these have no data
MPI_Send(
nullptr, // void* data
Expand All @@ -328,7 +330,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {
MPI_COMM_WORLD, // MPI_Comm communicator
&status); // MPI_Status* status
// Break if assigned job is out of range, work is finished
if (next_run >= plan.size()) {
if (next_run >= plans.size()) {
break;
}
// Pass the job to runner that requested it
Expand Down Expand Up @@ -383,7 +385,7 @@ unsigned int CUDAEnsemble::simulate(const RunPlanVector& plans) {

#ifdef FLAMEGPU_ENABLE_MPI
if (config.mpi) {
MPI_Finalize()
MPI_Finalize();
}
#endif
// Record and store the elapsed time
Expand Down
2 changes: 1 addition & 1 deletion src/flamegpu/simulation/detail/MPISimRunner.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ MPISimRunner::MPISimRunner(const std::shared_ptr<const ModelData> _model,
void MPISimRunner::start() {
// While there are still plans to process
while (true) {
const int run_id = next_run.load();
const unsigned int run_id = next_run.load();
if (run_id < plans.size()) {
// Process the assigned job
try {
Expand Down

0 comments on commit 8e05d93

Please sign in to comment.