-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathreply-to-extracted-email
executable file
·155 lines (134 loc) · 6.47 KB
/
reply-to-extracted-email
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/python
"""
Extract all emails in a Noark 5 file/mappe from the archive and hand
it over as a mbox file to the mail program.
"""
__license__ = 'GNU General Public License v2 or later at users choice'
__author__ = 'Petter Reinholdtsen'
import sys
import os
sys.path.append(os.path.join(sys.path[0],'lib'))
import argparse
import dateutil.parser
import email.utils
import json
import subprocess
import tempfile
from time import strftime, gmtime, mktime, time
import n5core.endpoint
from n5core.pick import pickUnlessOne
def main():
baseurl = "http://localhost:8092/noark5v4/"
parser = argparse.ArgumentParser()
parser.add_argument("--verbose", help="print more debug information",
action="store_true")
parser.add_argument("--baseurl", help="(default is %s)" % baseurl)
parser.add_argument("--keep", help="keep mbox file after execution",
action="store_true")
parser.add_argument("--mailer", default="mutt",
help="mail program to use (default is mutt)")
args = parser.parse_args()
if args.mailer is not None:
mailer=args.mailer
if args.baseurl:
baseurl = args.baseurl
(mailfh, mailpath) = tempfile.mkstemp(suffix='.mbox', text=True)
mailfile = os.fdopen(mailfh, 'w')
first=True
api = n5core.endpoint.Endpoint(baseurl)
api.verbose = args.verbose
api.login()
# Extract all emails from file, fake non-email as email with the
# reported content type, and format emails as mbox file.
fondsrel = api.relbaseurl + 'arkivstruktur/arkiv/'
fondshref = api.findRelation(fondsrel)
(c, res) = api.json_get(fondshref)
fondsinfo = json.loads(c)
info = pickUnlessOne(fondsinfo, 'arkiv')
serieshref = info['_links'][api.relbaseurl + 'arkivstruktur/arkivdel/']['href']
#print("Arkivdel %s" % serieshref)
(c, res) = api.json_get(serieshref)
seriesinfo = json.loads(c)
info = pickUnlessOne(seriesinfo, 'arkivdel')
fileshref = info['_links'][api.relbaseurl + 'arkivstruktur/mappe/']['href']
#print("Mappe %s" % fileshref)
(c, res) = api.json_get(fileshref)
filesinfo = json.loads(c)
fileinfo = pickUnlessOne(filesinfo, 'mappe')
fileshref = fileinfo['_links'][api.relbaseurl + 'arkivstruktur/registrering/']['href']
#print("Registrering %s" % fileshref)
(c, res) = api.json_get(fileshref)
reginfos = json.loads(c)
if 'results' in reginfos:
for reginfo in reginfos['results']:
docdeschref = reginfo['_links'][api.relbaseurl + 'arkivstruktur/dokumentbeskrivelse/']['href']
#print("Dokumentbeskrivelse %s" % docdeschref)
(c, res) = api.json_get(docdeschref)
docdescinfos = json.loads(c)
if 'results' in docdescinfos:
for docdescinfo in docdescinfos['results']:
if api.verbose:
print("Docdescinfo:", docdescinfo)
fileshref = docdescinfo['_links'][api.relbaseurl
+ 'arkivstruktur/dokumentobjekt/']['href']
#print("Dokumentobjekt %s" % fileshref)
(c, res) = api.json_get(fileshref)
docobjinfos = json.loads(c)
if 'results' in docobjinfos:
for docobjinfo in docobjinfos['results']:
if api.verbose:
print("Docobjinfo:", docobjinfo)
#print(docobjinfo['format'], docobjinfo['mimeType'])
contenthref = \
docobjinfo['_links'][api.relbaseurl
+ 'arkivstruktur/fil/']['href']
(c, res) = api._get(contenthref, {'Accept' : '*/*'})
# FIXME use timestamp from database?
fromdate = strftime("%a %b %d %H:%M:%S %Y",
gmtime(time()))
if not first:
mailfile.write("\n\n")
mailfile.write("From nikita %s\n" % fromdate)
ct = res.getheader('Content-Type')
if 'message/rfc822' == ct:
print("Received email from API (Content-Type %s)" % ct)
else:
if 'formatDetaljer' in docobjinfo:
d = "/" + docobjinfo['formatDetaljer']
else:
d = ""
if 'filnavn' in docobjinfo:
filnavn = docobjinfo['filnavn']
else:
filnavn = docobjinfo['systemID']
if api.verbose:
print("Docobjinfo:", docobjinfo)
subject = "%s: %s%s - %s" % \
(docobjinfo['variantformat'],
docobjinfo.get('format'), d, filnavn)
# FIXME figure out if there is a better date field to use
date = dateutil.parser.parse(docdescinfo['tilknyttetDato'])
print("Received non-email from API (Content-Type %s, filename %s, created %s)" % (ct, subject, date))
mailfile.write("Subject: %s\n" % subject)
mailfile.write("Date: %s\n" % \
email.utils.formatdate(mktime(date.timetuple())))
mailfile.write("Content-type: %s\n" % ct)
mailfile.write("\n")
mailfile.write(c)
first=False
mailfile.close()
if first:
print("error: failed to find any documents in this file, not starting mailer")
else:
# Start mailer with MAIL environment set to the mbox file
child_env = os.environ.copy()
child_env['MAIL'] = mailpath
p = subprocess.Popen([mailer], env=child_env)
p.wait()
# remove the mbox file
if not args.keep:
os.unlink(mailpath)
else:
print("Did not remove %s" % mailpath)
if __name__ == '__main__':
main()