forked from MaximilianV/fb_reactions_crawler
-
Notifications
You must be signed in to change notification settings - Fork 1
/
normalize.affective-text.py
60 lines (47 loc) · 1.82 KB
/
normalize.affective-text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
import operator
import json
import xml.etree.ElementTree as ET
def parse_arguments():
parser = argparse.ArgumentParser(description='Normalize the affective text dataset.')
parser.add_argument('xmlfile', metavar='textfile', help='a xml file containing the text')
parser.add_argument('goldfile', metavar='emotionfile', help='a gold file containing the emotions')
return parser.parse_args()
def get_most_common_reaction(reactionline):
reactionline = reactionline.strip('\n')
reactions = list(map(int, reactionline.split(' ')[1:]))
highest_reaction = max(reactions)
highest_reaction_index = reactions.index(highest_reaction)
if highest_reaction_index == 0:
return 'anger'
elif highest_reaction_index == 1:
return 'anger'
elif highest_reaction_index == 2:
return None
elif highest_reaction_index == 3:
return 'joy'
elif highest_reaction_index == 4:
return 'sadness'
elif highest_reaction_index == 5:
return 'surprise'
return None
def normalize(text, reactionline):
reaction = get_most_common_reaction(reactionline)
return {'message': text.text, 'reaction': reaction}
def main(run_args):
textfile = run_args.xmlfile
tree = ET.parse(textfile)
textlines = tree.getroot()
reactionfile = run_args.goldfile
reactionlines = []
with open(reactionfile, 'r') as infile:
for line in infile:
reactionlines.append(line)
normalized_posts = list(map(normalize, textlines, reactionlines))
filtered_posts = list(filter(lambda post: post['reaction'] != None, normalized_posts))
textfile = textfile.strip('.xml')
with open(textfile + '_normalized.json', 'w') as outfile:
json.dump(filtered_posts, outfile)
if __name__ == "__main__":
args = parse_arguments()
main(args)