From a15077701981419cf353181af8d580276661b7c8 Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Thu, 2 Jan 2025 13:06:09 -0300 Subject: [PATCH] delete pakistan_* ref #1014 --- docs/spiders.rst | 17 -------- .../spiders/pakistan_ppra_api.py | 39 ------------------- .../spiders/pakistan_ppra_bulk.py | 20 ---------- 3 files changed, 76 deletions(-) delete mode 100644 kingfisher_scrapy/spiders/pakistan_ppra_api.py delete mode 100644 kingfisher_scrapy/spiders/pakistan_ppra_bulk.py diff --git a/docs/spiders.rst b/docs/spiders.rst index b70abff96..ccd632fc9 100644 --- a/docs/spiders.rst +++ b/docs/spiders.rst @@ -1082,23 +1082,6 @@ Openopps env KINGFISHER_OPENOPPS_USERNAME=... KINGFISHER_OPENOPPS_PASSWORD=... scrapy crawl openopps -Pakistan -~~~~~~~~ - -.. autoclass:: kingfisher_scrapy.spiders.pakistan_ppra_api.PakistanPPRAAPI - :no-members: - -.. code-block:: bash - - scrapy crawl pakistan_ppra_api - -.. autoclass:: kingfisher_scrapy.spiders.pakistan_ppra_bulk.PakistanPPRABulk - :no-members: - -.. code-block:: bash - - scrapy crawl pakistan_ppra_bulk - Panama ~~~~~~ diff --git a/kingfisher_scrapy/spiders/pakistan_ppra_api.py b/kingfisher_scrapy/spiders/pakistan_ppra_api.py deleted file mode 100644 index 339d75436..000000000 --- a/kingfisher_scrapy/spiders/pakistan_ppra_api.py +++ /dev/null @@ -1,39 +0,0 @@ -import json - -import scrapy - -from kingfisher_scrapy.base_spiders import SimpleSpider -from kingfisher_scrapy.util import components, handle_http_error - - -class PakistanPPRAAPI(SimpleSpider): - """ - Domain - Pakistan Public Procurement Regulatory Authority (PPRA) - API documentation - https://www.ppra.org.pk/api/ - """ - - name = 'pakistan_ppra_api' - - # BaseSpider - validate_json = True # https://github.com/open-contracting/kingfisher-collect/issues/645 - skip_pluck = 'Already covered (see code for details)' # pakistan_ppra_bulk - - # SimpleSpider - data_type = 'release_package' - - def start_requests(self): - yield scrapy.Request( - 'https://www.ppra.org.pk/api/index.php/api/records', - meta={'file_name': 'list.html'}, - callback=self.parse_list - ) - - @handle_http_error - def parse_list(self, response): - for url in json.loads( - # remove the last item in the list to fix the str JSON format - response.xpath('//body//text()').getall()[6].replace(",\r\n\r\nhttps://www.ppra.org.pk", "") - ): - yield self.build_request(url, formatter=components(-2)) diff --git a/kingfisher_scrapy/spiders/pakistan_ppra_bulk.py b/kingfisher_scrapy/spiders/pakistan_ppra_bulk.py deleted file mode 100644 index e5a909f95..000000000 --- a/kingfisher_scrapy/spiders/pakistan_ppra_bulk.py +++ /dev/null @@ -1,20 +0,0 @@ -import scrapy - -from kingfisher_scrapy.base_spiders import SimpleSpider - - -class PakistanPPRABulk(SimpleSpider): - """ - Domain - Pakistan Public Procurement Regulatory Authority (PPRA) - API documentation - https://www.ppra.org.pk/api/ - """ - - name = 'pakistan_ppra_bulk' - - # SimpleSpider - data_type = 'release_package' - - def start_requests(self): - yield scrapy.Request('https://www.ppra.org.pk/api/index.php/api/release', meta={'file_name': 'releases.json'})