Skip to content

Commit

Permalink
Fixed Spiders labic#24
Browse files Browse the repository at this point in the history
Govgo govmg
  • Loading branch information
ligiaiv committed Oct 3, 2017
1 parent 8b1e291 commit 84e8ee7
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
11 changes: 11 additions & 0 deletions ze/processors/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,16 @@ def __call__(self, value, loader_context):
logger.error('Failed to replace "%s" selector from %s:\n%s',
selector, spider_name, e)

if spider_name is 'govmg':
try:
selector = 'a'
for el in html.select(selector):
el.replace_with(el.get_text())

except Exception as e:
logger.error('Failed to replace "%s" selector from %s:\n%s',
selector, spider_name, e)

# if spider_name is 'govpa':
# try:
# selector = '.texto'
Expand Down Expand Up @@ -1165,6 +1175,7 @@ def __call__(self, value, loader_context):
'#noticia_vinculadas',
# 'meta',
'figure meta',
'.full-sharing',
'noframes',
'noscript',
'object',
Expand Down
3 changes: 2 additions & 1 deletion ze/spiders/govgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class GovernoGoiasSpider(ZeSpider):
"meta[name=description]::attr(content)",
'meta[property="og:image"]::attr(content)',
'[itemprop="image"]::attr(src)',
'#texto_content img::attr("src")'
'#texto_content img::attr("src")',
'.content figure img::attr("src")'
]
}
},
Expand Down
9 changes: 6 additions & 3 deletions ze/spiders/govmg.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class GovernoMinasGeraisSpider(ZeSpider):
"meta[name=title]::attr(content)",
'[itemprop=headline]::text',
'.title-post::text',
'.entry-title::text'
'.entry-title::text',
'.title::text'
]
}
},
Expand Down Expand Up @@ -57,7 +58,8 @@ class GovernoMinasGeraisSpider(ZeSpider):
"css": [
'[itemprop=datePublished]::attr(content)',
'.entry-date::text',
'.text-date::attr(datetime)'
'.text-date::attr(datetime)',
'.date::text'
]
}
},
Expand All @@ -74,7 +76,8 @@ class GovernoMinasGeraisSpider(ZeSpider):
'[itemprop=articleBody]',
'.noticia',
'div.clear',
'article main'
'article main',
'.content'
]
}
},
Expand Down

0 comments on commit 84e8ee7

Please sign in to comment.