forked from commaai/openpilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwaste.c
89 lines (79 loc) · 2.54 KB
/
waste.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// gcc -O2 waste.c -lpthread -owaste
// gcc -O2 waste.c -lpthread -owaste -DMEM
#define _GNU_SOURCE
#include <stdio.h>
#include <math.h>
#include <sched.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <arm_neon.h>
#include <sys/sysinfo.h>
#include "../common/timing.h"
int get_nprocs(void);
double *ttime, *oout;
void waste(int pid) {
cpu_set_t my_set;
CPU_ZERO(&my_set);
CPU_SET(pid, &my_set);
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
printf("set affinity to %d: %d\n", pid, ret);
// 128 MB
float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t));
// comment out the memset for CPU only and not RAM
// otherwise we need this to avoid the zero page
#ifdef MEM
memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t));
#endif
float32x4_t out;
double sec = seconds_since_boot();
while (1) {
for (int i = 0; i < 0x10; i++) {
for (int j = 0; j < 0x800000; j+=0x20) {
out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
out = vmlaq_f32(out, tmp[j+6], tmp[j+7]);
out = vmlaq_f32(out, tmp[j+8], tmp[j+9]);
out = vmlaq_f32(out, tmp[j+10], tmp[j+11]);
out = vmlaq_f32(out, tmp[j+12], tmp[j+13]);
out = vmlaq_f32(out, tmp[j+14], tmp[j+15]);
out = vmlaq_f32(out, tmp[j+16], tmp[j+17]);
out = vmlaq_f32(out, tmp[j+18], tmp[j+19]);
out = vmlaq_f32(out, tmp[j+20], tmp[j+21]);
out = vmlaq_f32(out, tmp[j+22], tmp[j+23]);
out = vmlaq_f32(out, tmp[j+24], tmp[j+25]);
out = vmlaq_f32(out, tmp[j+26], tmp[j+27]);
out = vmlaq_f32(out, tmp[j+28], tmp[j+29]);
out = vmlaq_f32(out, tmp[j+30], tmp[j+31]);
}
}
double nsec = seconds_since_boot();
ttime[pid] = nsec-sec;
oout[pid] = out[0] + out[1] + out[2] + out[3];
sec = nsec;
}
}
int main() {
int CORES = get_nprocs();
ttime = (double *)malloc(CORES*sizeof(double));
oout = (double *)malloc(CORES*sizeof(double));
pthread_t waster[CORES];
for (long i = 0; i < CORES; i++) {
ttime[i] = NAN;
pthread_create(&waster[i], NULL, (void *(*)(void *))waste, (void*)i);
}
while (1) {
double avg = 0.0;
double iavg = 0.0;
for (int i = 0; i < CORES; i++) {
avg += ttime[i];
iavg += 1/ttime[i];
printf("%4.2f ", ttime[i]);
}
double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg;
printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
sleep(1);
}
}