diff --git a/corona_ce_spider.py b/corona_ce_spider.py index 08daa2b8..df353743 100644 --- a/corona_ce_spider.py +++ b/corona_ce_spider.py @@ -1,6 +1,7 @@ -import datetime import json -from urllib.parse import urljoin, urlencode +from collections import defaultdict +from datetime import date, datetime, timedelta +from urllib.parse import urlencode, urljoin import scrapy @@ -9,62 +10,84 @@ class CearaSpider(scrapy.Spider): name = "CE" + start_date = date(2020, 3, 2) base_url = "https://indicadores.integrasus.saude.ce.gov.br/api/coronavirus/" - start_date = datetime.date(2020, 3, 2) - def make_state_confirmed_request(self, date, callback, meta=None): + def make_city_request(self, date, meta): data = { "data": date, - "idMunicipio": "", - "tipo": "Confirmado", + "tipo": "Confirmado,Óbito", } url = urljoin(self.base_url, "qtd-por-municipio") + "?" + urlencode(data) - return scrapy.Request(url, callback=callback, meta=meta) + return scrapy.Request(url, callback=self.parse_city, meta=meta) - def make_city_deaths_request(self, date, city_id, callback, meta=None): + def make_state_confirmed_request(self, date, meta): data = { "data": date, - "idMunicipio": city_id, "tipo": "Confirmado", } + url = urljoin(self.base_url, "qtd-por-tipo") + "?" + urlencode(data) + return scrapy.Request(url, callback=self.parse_state_confirmed, meta=meta) + + def make_state_deaths_request(self, date, meta): + data = { + "data": date, + "tipo": "Óbito", + } url = urljoin(self.base_url, "qtd-obitos") + "?" + urlencode(data) - return scrapy.Request(url, callback=callback, meta=meta) + return scrapy.Request(url, callback=self.parse_state_death, meta=meta) def start_requests(self): - for date in date_utils.date_range(self.start_date, date_utils.today()): - yield self.make_state_confirmed_request( - date, - callback=self.parse_state_confirmed, - meta={"row": {"date": date}}, - ) + filtro_data_url = urljoin(self.base_url, "job") + yield scrapy.Request(filtro_data_url, self.parse_filter_date) + + def parse_filter_date(self, response): + filter_date = json.loads(response.body) + date_str = filter_date["time"].split(" ")[0] + end_date = datetime.strptime(date_str, "%d/%m/%Y").date() + end_date += timedelta(days=1) + + for case_date in date_utils.date_range(self.start_date, end_date): + meta = {"date": case_date} + + yield self.make_city_request(date=case_date, meta=meta) + yield self.make_state_confirmed_request(date=case_date, meta=meta) + + def parse_city(self, response): + map_city_case = defaultdict(dict) + cases = json.loads(response.body) + + for case in cases: + municipio = case["municipio"] + map_city_case[municipio].update(case) + + for case in map_city_case.values(): + municipio = case["municipio"] + if municipio == "Sem informação": + municipio = "Importados/Indefinidos" + + yield { + "date": response.meta["date"], + "state": self.name, + "city": municipio.title(), + "place_type": "city", + "confirmed": case.get("qtdConfirmado") or None, + "deaths": case.get("qtdObito") or None, + } def parse_state_confirmed(self, response): - date = response.meta["row"]["date"] - confirmed_data = json.loads(response.body) - - for city_data in confirmed_data: - assert city_data["tipo"] == "Positivo" - city = city_data["municipio"] - if city == "Sem informação": - city = "Importados/Indefinidos" - city_id = None - else: - city_id = city_data["idMunicipio"] - confirmed = city_data["quantidade"] - yield self.make_city_deaths_request( - date, - city_id, - callback=self.parse_city_deaths, - meta={"row": {"date": date, "city": city, "confirmed": confirmed}}, - ) - # TODO: check if all requests made here were received as reponses - - def parse_city_deaths(self, response): - row_data = response.meta["row"] - row_data["state"] = self.name - deaths_data = json.loads(response.body) - - assert len(deaths_data) == 1 - assert deaths_data[0]["tipo"] == "Óbito" - row_data["deaths"] = deaths_data[0]["quantidade"] - yield row_data + meta = {"date": response.meta["date"], "confirmed": json.loads(response.body)} + yield self.make_state_deaths_request(date=response.meta["date"], meta=meta) + + def parse_state_death(self, response): + all_types = json.loads(response.body) + response.meta["confirmed"] + map_type_qtt = {item["tipo"]: item["quantidade"] for item in all_types} + + yield { + "date": response.meta["date"], + "state": self.name, + "city": None, + "place_type": "state", + "confirmed": map_type_qtt.get("Positivo"), + "deaths": map_type_qtt.get("Óbito") or None, + }