forked from LLNL/ygm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bandwidth_check.cpp
94 lines (77 loc) · 3.26 KB
/
bandwidth_check.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
// Copyright 2019-2021 Lawrence Livermore National Security, LLC and other YGM
// Project Developers. See the top-level COPYRIGHT file for details.
//
// SPDX-License-Identifier: MIT
#include <cstdlib>
#include <iostream>
#include <random>
#include <ygm/comm.hpp>
#include <ygm/utility.hpp>
// This is just an example to check bandwidth when sending messages with a no-op
// to be done by the receiver. The #pragma omp's need to be uncommented for
// multithreaded versions and commented out for unthreaded.
int main(int argc, char** argv) {
ygm::comm world(&argc, &argv);
{
int num_nodes{atoi(std::getenv("SLURM_NNODES"))};
int num_tasks{atoi(std::getenv("SLURM_NTASKS"))};
std::string cluster_name(std::getenv("SLURM_CLUSTER_NAME"));
world.cout0("Bandwidth check on ", cluster_name, " with ", num_tasks,
" tasks on ", num_nodes, " nodes.\n");
int comm_rank = world.rank();
int comm_size = world.size();
{ // Send vectors
world.cout0("Bandwidth sending vectors");
int msgs_per_node{1024 * 1024};
int msgs_per_rank = msgs_per_node * num_nodes / num_tasks;
int msg_length{1024};
std::vector<int64_t> to_send;
for (size_t i = 0; i < msg_length; ++i) {
to_send.push_back(i);
}
world.barrier();
ygm::timer send_timer{};
std::mt19937 gen(4567 * comm_rank);
std::uniform_int_distribution<int> dest_dist(0, comm_size - 1);
for (int msg = 0; msg < msgs_per_rank; ++msg) {
world.async(dest_dist(gen),
[](auto mbox, const std::vector<int64_t>& vec) { return; },
to_send);
}
world.barrier();
double elapsed = send_timer.elapsed();
// Each message corresponds to 8 bytes per int64_t and 8 bytes for the
// function pointer
double bandwidth = float(msgs_per_rank) * num_tasks *
(8 * msg_length + 8) / elapsed / (1024 * 1024 * 1024);
world.cout0("Elapsed time: ", elapsed);
world.cout0("Vector Bandwidth: ", bandwidth, " GB/s\n");
}
{ // Send individual int64_t's
world.cout0("Bandwidth sending individual int64_t's");
uint64_t msgs_per_node{1024 * 1024 * 1024};
uint64_t msgs_per_rank = msgs_per_node * num_nodes / num_tasks;
std::vector<int> destinations;
std::mt19937 gen(1234 * comm_rank);
std::uniform_int_distribution<int> dest_dist(0, comm_size - 1);
for (int msg = 0; msg < msgs_per_rank; ++msg) {
destinations.push_back(dest_dist(gen));
}
world.barrier();
ygm::timer send_timer{};
for (uint64_t msg = 0; msg < msgs_per_rank; ++msg) {
world.async(destinations[msg],
[](auto mbox, const int64_t val) { return; },
destinations[msg]);
}
world.barrier();
double elapsed = send_timer.elapsed();
// Each message corresponds to 8 bytes for the int64_t and 8 bytes for the
// function pointer
double bandwidth = float(msgs_per_rank) * num_tasks * (8 + 8) / elapsed /
(1024 * 1024 * 1024);
world.cout0("Elapsed time: ", elapsed);
world.cout0("int64_t Bandwidth: ", bandwidth, " GB/s\n");
}
}
}