-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathnames.py
43 lines (32 loc) · 1.01 KB
/
names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import xlrd
import json
wb = xlrd.open_workbook("40-60 house.xlsx" , encoding_override="utf-8")
data = {}
# Name of the sheet the data is on
sheet = "Data"
for i in range(wb.sheet_by_name( sheet ).nrows):
name = wb.sheet_by_name( sheet ).row(i)[1]
name = name.value.encode('utf-8')
names = str( name ).split(' ')
gender = wb.sheet_by_name( sheet ).row(i)[3].value.encode('utf-8')
index = 0
for n in names:
# Replacing unnecessary characters that were created when converting the PDF file to Excel file
n = n.replace("'", "").replace("1", "I").replace("l", "I")
try:
data[ n ]['count'] = data[ n ]['count'] + 1
except:
# If it's not the first name then the gender is male by default
if index != 0:
gender = "Male"
# Setting the initial value for a name
data[ n ] = {
"gender" : gender.upper(),
"count" : 1
}
# keeps count of names that have been split
index = index + 1
# Removing an empty
data.pop('')
with open('data.json', 'w') as fp:
json.dump(data, fp)