-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathplot.py
115 lines (93 loc) · 3.22 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import matplotlib.pyplot as plt
import pandas as pd
import click
import numpy as np
@click.group()
def cli():
pass
def save(name):
plt.savefig(f"figures/{name}.png")
plt.savefig(f"figures/{name}.pdf")
def plot_tree_performance(name):
df = pd.read_csv(f"data/{name}.csv")
df = df[df.sample_size >= 1000]
fig, ax = plt.subplots(1, 1)
implementations = sorted(set(df.implementation))
dfo = df[df.order == "msprime"]
line_map = {}
for implementation in implementations:
dfi = dfo[dfo.implementation == implementation]
(line,) = ax.loglog(dfi.sample_size, dfi.time_mean, "-o", label=implementation)
line_map[implementation] = line
dfo = df[df.order == "preorder"]
for implementation in implementations:
dfi = dfo[dfo.implementation == implementation]
(line,) = ax.loglog(
dfi.sample_size,
dfi.time_mean,
"--",
color=line_map[implementation].get_color(),
)
legend1 = ax.legend()
# TODO make both these lines black
ax.legend(
[line, list(line_map.values())[0]], ["preorder", "msprime"], loc="lower right"
)
ax.set_xlabel("Sample size")
ax.set_ylabel("CPU Time")
fig.add_artist(legend1)
save(f"{name}")
@click.command()
def tree_performance():
plot_tree_performance("tree-performance-sequential")
plot_tree_performance("tree-performance-vectorised")
@click.command()
def tree_performance_relative():
df1 = pd.read_csv("data/tree-performance-sequential.csv")
df2 = pd.read_csv("data/tree-performance-vectorised.csv")
df = pd.concat([df1, df2])
# df = df[df.sample_size >= 1000]
print(df)
# Only interested in the close-to-metal implementations here
implementations = [x for x in sorted(set(df.implementation)) if x.startswith("c_")]
print(implementations)
fig, axes = plt.subplots(1, 2, figsize=(8, 4), sharey=True)
dfo = df[df.order == "msprime"]
print(dfo)
norm = np.array(dfo[dfo.implementation == "c_lib"].time_mean)
print(norm)
line_map = {}
ax = axes[0]
ax.set_title("Node order = msprime")
for implementation in implementations:
dfi = dfo[dfo.implementation == implementation]
(line,) = ax.plot(
dfi.sample_size, dfi.time_mean / norm, "-o", label=implementation
)
line_map[implementation] = line
ax = axes[1]
ax.set_title("Node order = preorder")
dfo = df[df.order == "preorder"]
norm = np.array(dfo[dfo.implementation == "c_lib"].time_mean)
for implementation in implementations:
print(implementation)
dfi = dfo[dfo.implementation == implementation]
print(dfi.sample_size)
print(dfi.time_mean)
ax.plot(
dfi.sample_size,
dfi.time_mean / norm,
"-o",
color=line_map[implementation].get_color(),
)
for ax in axes:
ax.set_xscale("log")
ax.set_xlabel("Sample size")
legend1 = axes[0].legend()
axes[0].set_ylabel("CPU Time relative to tskit C library call")
plt.tight_layout()
save("tree-performance-relative")
cli.add_command(tree_performance)
cli.add_command(tree_performance_relative)
if __name__ == "__main__":
cli()