-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgre-easy.py
57 lines (42 loc) · 1.33 KB
/
gre-easy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from bs4 import BeautifulSoup as BS
import requests
import traceback
import json
f1 = open('out11.txt', 'w')
URL="https://quizlet.com/47571/barrons-gre-wordlist-4759-words-flash-cards/"
#URL = "http://localhost:443/github/site.html"
response = requests.get(URL)
#response.encoding = "ISO-8859-1"
#print response.encoding
soup = BS(response.text, "lxml" ,from_encoding="UTF-8")
#print soup
results_list = soup.find_all('div', class_="text")
#print results_list
words_list = []
try:
for result in results_list:
word_tag = result.find('span',class_="TermText qWord lang-en")
defination_tag = result.find('span',class_="TermText qDef lang-en")
#wrs = unicode(word_tag, "utf-8")
#word = wrs.contents[0]
#data[word_tag.text.encode('utf-8')] = defination_tag.text
#print defination_tag.text
local_data = {}
local_data={
'meaning' : defination_tag.text.encode('utf-8') ,
'word' : word_tag.text.encode('utf-8')
}
#print local_data
#local_data_json = json.dumps(local_data)
#print >> f1, json.dumps(local_data)
words_list.append(local_data)
#data['words_list'].append({json.dumps(local_data)})
#json_data = json.dumps(data)
#print json_data
except:
pass
#data_json = json.dumps(data['words_list'])
#print data['words_list']
data = { 'all_words' : words_list}
print >> f1, json.dumps(data)
#f1.close()