-
Notifications
You must be signed in to change notification settings - Fork 0
/
ingestZetaFortCSV.py
executable file
·49 lines (33 loc) · 1.39 KB
/
ingestZetaFortCSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/home/data/anaconda3/bin/python
# -*- coding: utf-8 -*-
import os, time, glob
from zipfile import ZipFile
def ingestData(dirpath,inzip):
if len([f for f in glob.glob("/home/data/ingest")]) == 0:
os.mkdir("/home/data/ingest")
tablename = inzip.split('.')[0].lower()
with open('/home/data/ingest/'+tablename+'.csv', 'a') as file:
file.write('records_ingested,time_lapsed\n')
file.close()
zipfort = ZipFile(dirpath+inzip, 'r')
ziplist = zipfort.namelist()
for zipfile in ziplist:
start_time = time.time()
zipfort.extract(zipfile,path=dirpath)
stream = os.popen('timescaledb-parallel-copy --db-name postgres --connection "host=localhost user=data password=adcirc sslmode=disable" --table '+tablename+' --file '+dirpath+zipfile+' --skip-header --workers 4 --copy-options "CSV"')
output = stream.read()
os.remove(dirpath+zipfile)
os.rmdir(dirpath+'csvfort')
stop_time = time.time()
time_lapsed = stop_time - start_time
with open('/home/data/ingest/'+tablename+'.csv', 'a') as file:
file.write(output.strip()+','+str(time_lapsed)+'\n')
file.close()
zipfort.close()
dirpath = "/home/data/zip/"
infiles = [f for f in glob.glob(dirpath+"*.zip")]
infiles.sort()
dirlength = len(dirpath)
for infile in infiles:
inzip = infile[dirlength:]
ingestData(dirpath,inzip)