forked from ehanson8/dspace-editing
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathaddKeyValuePairOnItemIdCSV.py
122 lines (105 loc) · 4.04 KB
/
addKeyValuePairOnItemIdCSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import requests
import secrets
import time
import csv
from datetime import datetime
import pandas as pd
import urllib3
import argparse
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
secretsVersion = input('To edit production, enter secrets filename: ')
if secretsVersion != '':
try:
secrets = __import__(secretsVersion)
print('Editing Production')
except ImportError:
print('Editing Stage')
else:
print('Editing Stage')
baseURL = secrets.baseURL
email = secrets.email
password = secrets.password
filePath = secrets.filePath
skippedCollections = secrets.skippedCollections
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileName')
parser.add_argument('-k', '--key')
args = parser.parse_args()
if args.fileName:
fileName = args.fileName
else:
fileName = input('Enter file name of CSV (including \'.csv\'): ')
if args.key:
key = args.key
else:
key = input('Enter the key: ')
key = 'dc.title.alternative'
startTime = time.time()
data = {'email': email, 'password': password}
header = {'content-type': 'application/json', 'accept': 'application/json'}
session = requests.post(baseURL+'/rest/login', headers=header,
params=data).cookies['JSESSIONID']
cookies = {'JSESSIONID': session}
headerFileUpload = {'accept': 'application/json'}
cookiesFileUpload = cookies
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies).json()
print('authenticated')
dt = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
logList = []
with open(fileName) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
provNoteElement = {}
logDict = {}
itemLink = row['itemID']
itemLink = '/rest/items/'+itemLink
newValue = row['local.related.conference']
newValues = newValue.split('|')
print(itemLink)
provNoteElement['key'] = 'dc.description.provenance'
provNoteElement['language'] = 'en_US'
link = baseURL+itemLink+'/metadata'
metadata = requests.get(link, headers=header, cookies=cookies).json()
df = pd.DataFrame.from_dict(metadata)
df = df.drop(['schema', 'element', 'qualifier'], axis=1)
keyList = df.key.tolist()
oldkeyCount = len(keyList)
if key in keyList:
print('Error: '+itemLink+' already exists')
logList.append({'itemID': itemLink, 'delete': 'ERROR',
'post': 'ERROR'})
pass
else:
for count, nv in enumerate(newValues):
nv = nv.strip()
newElement = {'key': key, 'value': nv}
print(newElement)
provNote = key+': '+nv+' added by batch process on '+dt+'.'
provNoteElement['value'] = provNote
print(provNoteElement)
df = df.append([newElement, provNoteElement],
ignore_index=True)
scount = str(count)
logDict.update({'key_'+scount: key, 'value_'+scount: nv})
keyList = df.key.tolist()
newkeyCount = len(keyList)
keyChange = newkeyCount - oldkeyCount
print(str(keyChange)+' key/value pairs added to record')
itemMetadataProcessed = df.to_json(orient='records')
delete = requests.delete(link, headers=header,
cookies=cookies)
print(delete)
post = requests.put(link, headers=header, cookies=cookies, data=itemMetadataProcessed)
print(post)
print('')
logDict.update({'itemID': itemLink, 'delete': delete,
'post': post})
logList.append(logDict)
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies)
log = pd.DataFrame.from_dict(logList)
print(log.head(15))
log.to_csv('logOfAddingKeyValuePairsByItemID_'+dt+'.csv', index=False)
elapsedTime = time.time() - startTime
m, s = divmod(elapsedTime, 60)
h, m = divmod(m, 60)
print('Total script run time: ', '%d:%02d:%02d' % (h, m, s))