This repository has been archived by the owner on Dec 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathP2Import.py
69 lines (55 loc) · 2.13 KB
/
P2Import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from datetime import datetime
import os
import importlib
from pystats2md.micro_bench import MicroBench
from pystats2md.helpers import metric2str, bytes2str
from PyStorageHelpers import *
from P0Config import P0Config
class P2Import(object):
"""
Performs bulk import into DB.
Saves stats.
"""
def __init__(self):
self.conf = P0Config.shared()
def run(self):
for dataset in self.conf.datasets:
for db in self.conf.databases:
tdb = self.conf.make_db(database=db, dataset=dataset)
self.import_texts(tdb=tdb, database=db, dataset=dataset)
def import_texts(self, tdb, database: dict, dataset: dict):
if tdb is None:
return
db_name = database['name']
dataset_name = dataset['name']
if (tdb.count_texts() != 0):
print(f'-- Skipping: {dataset_name} -> {db_name}')
return
dataset_path = self.conf.normalize_path(dataset['path'])
file_size = os.path.getsize(dataset_path)
print(f'-- Bulk importing: {dataset_name} -> {db_name}')
print(f'--- started at:', datetime.now().strftime('%H:%M:%S'))
print(f'--- file size:', bytes2str(file_size))
def import_one() -> int:
import_texts(tdb, dataset_path)
return tdb.count_texts()
counter = MicroBench(
benchmark_name='Sequential Writes: Import CSV',
func=import_one,
database=db_name,
dataset=dataset_name,
source=self.conf.default_stats_file,
device_name=self.conf.device_name,
)
counter.run_if_missing()
print(f'--- docs:', metric2str(counter.count_operations))
print(f'--- docs/second:', metric2str(counter.ops_per_sec()))
print(f'--- bytes/second:', bytes2str(file_size / counter.time_elapsed))
print(f'--- finished at:', datetime.now().strftime('%H:%M:%S'))
self.conf.default_stats_file.dump_to_file()
if __name__ == "__main__":
c = P0Config(device_name='MacbookPro')
try:
P2Import().run()
finally:
c.default_stats_file.dump_to_file()