-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvisualize_USE.py
40 lines (31 loc) · 1.52 KB
/
visualize_USE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
from nltk.tokenize.treebank import TreebankWordDetokenizer
import toyplot as tps
import tensorflow_hub as hub
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")
# Finding word importance.
def scores(text1, text2):
vectors = embed([text1, text2])
return cosine_similarity([vectors[0]], [vectors[1]])[0][0]
def get_important_words(original_text, reference_text):
original_score = scores(original_text, reference_text)
text_ls = word_tokenize(original_text)
len_text = len(text_ls)
leave_1_texts = [text_ls[:ii] + [''] + text_ls[min(ii + 1, len_text):] for ii in range(len_text)]
new_similarity_scores = list(
[original_score - scores(TreebankWordDetokenizer().detokenize(i), reference_text) for i in leave_1_texts])
return new_similarity_scores
def get_color(score):
if score > 0:
return tps.color.brewer.map('Greens').css(1 - score, domain_min=0, domain_max=1)
else:
return tps.color.brewer.map('Reds').css(1 + score, domain_min=0, domain_max=1)
def get_visual_representation(scores, user_text):
modified_user_text = []
tokenized_user_text = word_tokenize(user_text)
for score, word in zip(scores, tokenized_user_text):
color = get_color(score)
new_word = '<span style="background-color:' + color + '">' + word + '</span>'
modified_user_text.append(new_word)
return TreebankWordDetokenizer().detokenize(modified_user_text)