-
Notifications
You must be signed in to change notification settings - Fork 0
/
perfexpl.c
164 lines (138 loc) · 4.22 KB
/
perfexpl.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#include <sys/mman.h>
#include <sys/wait.h>
#include <assert.h>
#include <inttypes.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "syscallers.h"
#define MAX_TEST_PROG_CMDLINE_WORDS 5
typedef struct {
const char *cmdline[MAX_TEST_PROG_CMDLINE_WORDS];
const char *const null;
} test_prog_t;
static const test_prog_t TEST_PROGS[] = {
// TODO: Use hugepages!
{.cmdline = {"clients/square_evictions", "-n1", "-c0", "-e25", "-r"}},
};
static const int NUM_TEST_PROGS = sizeof TEST_PROGS / sizeof *TEST_PROGS;
#define PERF_BUFFER_NUM_PAGES (1 + (1 << 5))
static const struct perf_event_attr METRIC = {
// TODO: Use bandwidth counter!
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CACHE_MISSES,
.sample_period = 1,
.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU,
.disabled = 0x1,
.enable_on_exec = 0x1,
.size = sizeof METRIC,
};
typedef struct {
struct perf_event_header header;
uint64_t time;
uint32_t cpu, res;
uint64_t value;
} perf_record_sample_t;
#define SIG_CHILD_PROC_UP SIGUSR1
#define SIG_EXEC_TEST_PROG SIGUSR1
typedef struct {
pid_t pid;
int fd;
struct perf_event_mmap_page *buf;
} test_proc_t;
static sigset_t block_signal(int signal) {
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, signal);
sigprocmask(SIG_BLOCK, &mask, NULL);
return mask;
}
static void await_signal(int signal) {
int received;
sigset_t await = block_signal(signal);
sigwait(&await, &received);
assert(received == signal);
}
static inline perf_record_sample_t *first_sample(struct perf_event_mmap_page *ptr) {
return (perf_record_sample_t *) ((uintptr_t) ptr + ptr->data_offset);
}
static inline void next_sample(perf_record_sample_t **ptr) {
*ptr += (*ptr)->header.size;
}
int main(void) {
test_proc_t children[NUM_TEST_PROGS];
memset(&children, 1, sizeof children);
int ret = 0;
for(int prog = 0; prog < NUM_TEST_PROGS; ++prog) {
block_signal(SIG_CHILD_PROC_UP);
pid_t pid = fork();
switch(pid) {
case -1:
perror("Forking child");
ret = 1;
goto cleanup;
case 0:
// TODO: Set processor affinity
kill(getppid(), SIG_CHILD_PROC_UP);
await_signal(SIG_EXEC_TEST_PROG);
exec_v(TEST_PROGS[prog].cmdline[0], TEST_PROGS[prog].cmdline);
perror("Executing test program");
ret = 1;
goto cleanup;
default:
await_signal(SIG_CHILD_PROC_UP);
children[prog].pid = pid;
struct perf_event_attr metric;
memcpy(&metric, &METRIC, sizeof metric);
int fd = perf_event_open(&metric, pid, -1, -1, 0);
if(fd < 0) {
fputs("Starting Perf: permissions?\n", stderr);
ret = 1;
goto cleanup;
}
children[prog].fd = fd;
struct perf_event_mmap_page *buf =
mmap(NULL, PERF_BUFFER_NUM_PAGES * getpagesize(),
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(!buf || buf == MAP_FAILED) {
perror("Mapping perf buffer");
ret = 1;
goto cleanup;
}
children[prog].buf = buf;
printf("Data buffer at %llu, size %llu\n", buf->data_offset, buf->data_size);
printf("Data head at %llu\n", buf->data_head);
}
}
for(int prog = 0; prog < NUM_TEST_PROGS; ++prog)
if(kill(children[prog].pid, SIG_EXEC_TEST_PROG)) {
ret = 1;
goto cleanup;
}
// TODO: Use mremap() to dynamically resize bufs as data_head indicates they're filling up
for(int prog = 0; prog < NUM_TEST_PROGS; ++prog) {
if(waitpid(children[prog].pid, NULL, 0) < 0)
perror("Awaiting child");
puts("time,cache-misses,cpu");
perf_record_sample_t *first = first_sample(children[prog].buf);
printf("%u %u\n", first->header.size, sizeof *first);
assert(first->header.size == sizeof *first);
uint64_t last_misses = 0;
for(perf_record_sample_t *each = first;
(uintptr_t) each <
(uintptr_t) children[prog].buf + children[prog].buf->data_size;
next_sample(&each)) {
printf("%" PRId64 ",%" PRId64 ",%" PRId32 "\n", each->time, each->value - last_misses, each->cpu);
last_misses = each->value;
}
printf("Test program ran for %llu ns\n", children[prog].buf->time_running);
printf("Data head at %llu\n", children[prog].buf->data_head);
}
cleanup:
for(int prog = 0; prog < NUM_TEST_PROGS; ++prog)
if(children[prog].buf)
munmap(children[prog].buf, PERF_BUFFER_NUM_PAGES * getpagesize());
return ret;
}