Skip to content

Commit

Permalink
fix(EuropeanDynamicsBase): check for valid JSON files
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Apr 5, 2024
1 parent 3d9701f commit 8a42ed3
Showing 1 changed file with 15 additions and 10 deletions.
25 changes: 15 additions & 10 deletions kingfisher_scrapy/spiders/european_dynamics_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from json import JSONDecodeError
from urllib.parse import urlsplit

import scrapy
Expand Down Expand Up @@ -44,13 +45,17 @@ def start_requests(self):

@handle_http_error
def parse_list(self, response):
for number, url in enumerate(reversed(response.json()['packagesPerMonth'])):
path = urlsplit(url).path
if self.from_date and self.until_date:
# URL looks like https://www.zppa.org.zm/ocds/services/recordpackage/getrecordpackage/2016/7
year, month = map(int, url.rsplit('/', 2)[1:])
url_date = datetime.datetime(year, month, 1)
if not (self.from_date <= url_date <= self.until_date):
continue
yield self.build_request(f'{self.base_url}{path}', formatter=join(components(-2), extension='zip'),
priority=number * -1)
try:
data = response.json()
for number, url in enumerate(reversed(data['packagesPerMonth'])):
path = urlsplit(url).path
if self.from_date and self.until_date:
# URL looks like https://www.zppa.org.zm/ocds/services/recordpackage/getrecordpackage/2016/7
year, month = map(int, url.rsplit('/', 2)[1:])
url_date = datetime.datetime(year, month, 1)
if not (self.from_date <= url_date <= self.until_date):
continue
yield self.build_request(f'{self.base_url}{path}', formatter=join(components(-2), extension='zip'),
priority=number * -1)
except JSONDecodeError:
return self.build_file_error_from_response(response, errors=response.text)

0 comments on commit 8a42ed3

Please sign in to comment.