-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathspider.py
43 lines (29 loc) · 951 Bytes
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import scrapy
import json
class KaggleStripper(scrapy.Spider):
name = 'sasori'
def __init__(self,response):
html = response.text
selector = scrapy.Selector(text = html)
self.selector = selector
def start_requests(self):
main_section = self.selector.css(".kaggle-component").extract()[1]
print("lol")
o = c = 0
third = 0
for i in range(len(main_section)):
if main_section[i] == "{":
o = i
third+=1
if third == 2:
break
for i in range(len(main_section)):
if main_section[i] == "}":
c = i
#print(main_section[o:c+1])
return main_section[o:c+1]
def parse(self, response):
item = {}
title = response.css('title::text').get()
item['title'] = title
yield item