This repository has been archived by the owner on Dec 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathP3Bench.py
220 lines (196 loc) · 6.96 KB
/
P3Bench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import os
from time import time
from pystats2md.stats_file import StatsFile
from pystats2md.micro_bench import MicroBench
from PyStorageGraph.BaseAPI import BaseAPI
from P0Config import P0Config
from P3TasksSampler import P3TasksSampler
from PyStorageHelpers import *
class P3Bench(object):
"""
Benchmarks groups of operations in following order:
2. Edge lookups and simple queries.
3. A few complex analytical queries.
4. Modifications: removing and restoring same objects.
5. Clearing all the data (if needed).
"""
def __init__(self, max_seconds_per_query=30):
self.conf = P0Config.shared()
self.max_seconds_per_query = max_seconds_per_query
self.tasks = P3TasksSampler()
allow_big_csv_fields()
def run(self, repeat_existing=False):
self.repeat_existing = repeat_existing
for dataset in self.conf.datasets:
dataset_path = self.conf.normalize_path(dataset['path'])
self.tasks.sample_file(dataset_path)
for db in self.conf.databases:
self.tdb = self.conf.make_db(database=db, dataset=dataset)
self.database = db
self.dataset = dataset
self.bench_buffered_graph()
self.conf.default_stats_file.dump_to_file()
def bench_buffered_graph(self):
if self.tdb is None:
return
if self.tdb.count_texts() == 0:
return
print('- Benchmarking: {} @ {}'.format(
self.dataset['name'],
self.database['name']
))
# Queries returning single object.
self.bench_task(
name='Random Reads: Lookup Doc by ID',
func=self.get
)
# Queries returning collections.
self.bench_task(
name='Random Reads: Find up to 10,000 Docs containing a Short Word',
func=lambda: self.find_substrings(
max_matches=10000, queries=self.tasks.short_words_to_search)
)
self.bench_task(
name='Random Reads: Find up to 20 Docs containing a Short Word',
func=lambda: self.find_substrings(
max_matches=20, queries=self.tasks.short_words_to_search)
)
self.bench_task(
name='Random Reads: Find up to 20 Docs with Short Phrases',
func=lambda: self.find_substrings(
max_matches=20, queries=self.tasks.short_phrases_to_search)
)
self.bench_task(
name='Random Reads: Find up to 20 Docs containing a Long Word',
func=lambda: self.find_substrings(
max_matches=20, queries=self.tasks.long_words_to_search)
)
self.bench_task(
name='Random Reads: Find up to 20 Docs with Long Phrases',
func=lambda: self.find_substrings(
max_matches=20, queries=self.tasks.long_phrases_to_search)
)
# for regex_template in self.tasks.regexs_to_search:
# name = 'Random Reads: Find up to 20 RegEx Matches ({})'.format(
# regex_template['Name'])
# regexs = regex_template['Tasks']
# self.bench_task(
# name=name,
# func=lambda: self.find_regex(
# regexs=regexs, max_matches=20)
# )
# Reversable write operations.
self.bench_task(
name='Random Writes: Remove Doc',
func=self.remove_text
)
self.bench_task(
name='Random Writes: Upsert Doc',
func=self.upsert_text
)
self.bench_task(
name='Random Writes: Remove Docs Batch',
func=self.remove_texts
)
self.bench_task(
name='Random Writes: Upsert Docs Batch',
func=self.upsert_texts
)
def bench_task(self, name, func):
dataset_name = self.dataset['name']
db_name = self.database['name']
print(f'--- {db_name}: {name} @ {dataset_name}')
counter = MicroBench(
benchmark_name=name,
func=func,
database=db_name,
dataset=dataset_name,
source=self.conf.default_stats_file,
device_name=self.conf.device_name,
limit_iterations=1,
limit_seconds=None,
limit_operations=None,
)
if not self.repeat_existing:
if self.conf.default_stats_file.contains(counter):
print('--- Skipping!')
return
print('---- Running!')
counter.run()
if counter.count_operations == 0:
print('---- Didn\'t measure!')
return
print('---- Importing new stats!')
# ---
# Operations
# ---
def get(self) -> int:
cnt = 0
cnt_found = 0
t0 = time()
for doc_id in self.tasks.doc_ids_to_query:
match = self.tdb.get(doc_id)
cnt += 1
cnt_found += 0 if (match is None) else 1
dt = time() - t0
if dt > self.max_seconds_per_query:
break
print(f'---- {cnt} ops: {cnt_found} ID matches')
return cnt
def find_substrings(self, max_matches: int, queries: list) -> int:
cnt = 0
cnt_found = 0
t0 = time()
for q in queries:
doc_ids = self.tdb.find_substring(query=q, max_matches=max_matches)
cnt += 1
cnt_found += len(doc_ids)
dt = time() - t0
if dt > self.max_seconds_per_query:
break
print(f'---- {cnt} ops: {cnt_found} matches found')
return cnt
def find_regex(self, regexs, max_matches: int = None) -> int:
cnt = 0
cnt_found = 0
t0 = time()
for regex in regexs:
doc_ids = self.tdb.find_regex(
query=regex, max_matches=max_matches)
cnt += 1
cnt_found += len(doc_ids)
dt = time() - t0
if dt > self.max_seconds_per_query:
break
print(f'---- {cnt} ops: {cnt_found} matches found')
return cnt
def remove_text(self) -> int:
cnt = 0
for doc in self.tasks.docs_to_change_by_one:
self.tdb.remove(doc._id)
cnt += 1
return cnt
def upsert_text(self) -> int:
cnt = 0
for doc in self.tasks.docs_to_change_by_one:
self.tdb.add(doc)
cnt += 1
return cnt
def remove_texts(self) -> int:
cnt = 0
for docs in self.tasks.docs_to_change_batched:
self.tdb.remove(list([doc._id for doc in docs]))
cnt += len(docs)
return cnt
def upsert_texts(self) -> int:
cnt = 0
for docs in self.tasks.docs_to_change_batched:
self.tdb.add(docs)
cnt += len(docs)
return cnt
if __name__ == "__main__":
c = P0Config(device_name='MacbookPro')
try:
P3Bench().run()
finally:
c.default_stats_file.dump_to_file()