diff --git a/CHANGELOG.md b/CHANGELOG.md index a47e995e..bb57c3a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [0.6.7] - 2024-01-17 + +### Added + +- Add national risk AFG, BFA, MLI, NGA, TCD, YEM + ## [0.6.6] - 2023-01-08 ### Added diff --git a/requirements.txt b/requirements.txt index dc006614..f8d856c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,7 +53,7 @@ filelock==3.13.1 # via virtualenv frictionless==5.16.0 # via hdx-python-utilities -google-auth==2.26.0 +google-auth==2.26.2 # via # google-auth-oauthlib # gspread @@ -63,11 +63,11 @@ greenlet==3.0.3 # via sqlalchemy gspread==5.12.4 # via hdx-python-scraper -hapi-schema==0.5.0 +hapi-schema==0.6.0 # via hapi-pipelines (pyproject.toml) -hdx-python-api==6.2.0 +hdx-python-api==6.2.1 # via hdx-python-scraper -hdx-python-country==3.6.3 +hdx-python-country==3.6.4 # via # hapi-pipelines (pyproject.toml) # hdx-python-api @@ -76,7 +76,7 @@ hdx-python-database[postgresql]==1.2.9 # via hapi-pipelines (pyproject.toml) hdx-python-scraper==2.3.2 # via hapi-pipelines (pyproject.toml) -hdx-python-utilities==3.6.3 +hdx-python-utilities==3.6.4 # via # hdx-python-api # hdx-python-country @@ -96,17 +96,17 @@ iniconfig==2.0.0 # via pytest isodate==0.6.1 # via frictionless -jinja2==3.1.2 +jinja2==3.1.3 # via frictionless jsonlines==4.0.0 # via hdx-python-utilities -jsonpath-ng==1.6.0 +jsonpath-ng==1.6.1 # via libhxl jsonschema==4.17.3 # via # frictionless # tableschema-to-template -libhxl==5.1 +libhxl==5.2 # via # hapi-pipelines (pyproject.toml) # hdx-python-api @@ -149,9 +149,9 @@ pockets==0.9.1 # via sphinxcontrib-napoleon pre-commit==3.6.0 # via hapi-pipelines (pyproject.toml) -psycopg[binary]==3.1.16 +psycopg[binary]==3.1.17 # via hdx-python-database -psycopg-binary==3.1.16 +psycopg-binary==3.1.17 # via psycopg pyasn1==0.5.1 # via @@ -250,7 +250,7 @@ sqlalchemy==2.0.25 # hdx-python-database stringcase==1.2.0 # via frictionless -structlog==23.3.0 +structlog==24.1.0 # via libhxl tableschema-to-template==0.0.13 # via hdx-python-utilities @@ -269,7 +269,7 @@ typing-extensions==4.9.0 # pydantic-core # sqlalchemy # typer -unidecode==1.3.7 +unidecode==1.3.8 # via # libhxl # pyphonetics diff --git a/src/hapi/pipelines/app/__main__.py b/src/hapi/pipelines/app/__main__.py index 258b53e8..9f838b51 100755 --- a/src/hapi/pipelines/app/__main__.py +++ b/src/hapi/pipelines/app/__main__.py @@ -174,6 +174,7 @@ def main( "core.yaml", "food_security.yaml", "humanitarian_needs.yaml", + "national_risk.yaml", "operational_presence.yaml", "population.yaml", ] diff --git a/src/hapi/pipelines/app/pipelines.py b/src/hapi/pipelines/app/pipelines.py index dfccee47..f59dd8e1 100644 --- a/src/hapi/pipelines/app/pipelines.py +++ b/src/hapi/pipelines/app/pipelines.py @@ -17,6 +17,7 @@ from hapi.pipelines.database.ipc_type import IpcType from hapi.pipelines.database.locations import Locations from hapi.pipelines.database.metadata import Metadata +from hapi.pipelines.database.national_risk import NationalRisk from hapi.pipelines.database.operational_presence import OperationalPresence from hapi.pipelines.database.org import Org from hapi.pipelines.database.org_type import OrgType @@ -155,6 +156,7 @@ def _create_configurable_scrapers( _create_configurable_scrapers( "humanitarian_needs", "admintwo", adminlevel=self.admintwo ) + _create_configurable_scrapers("national_risk", "national") def run(self): self.runner.run() @@ -240,3 +242,16 @@ def output(self): results=results, ) humanitarian_needs.populate() + + if not self.themes_to_run or "national_risk" in self.themes_to_run: + results = self.runner.get_hapi_results( + self.configurable_scrapers["national_risk"] + ) + + national_risk = NationalRisk( + session=self.session, + metadata=self.metadata, + locations=self.locations, + results=results, + ) + national_risk.populate() diff --git a/src/hapi/pipelines/configs/national_risk.yaml b/src/hapi/pipelines/configs/national_risk.yaml new file mode 100755 index 00000000..94173ce8 --- /dev/null +++ b/src/hapi/pipelines/configs/national_risk.yaml @@ -0,0 +1,45 @@ +#National risk config file + +national_risk_national: + national_risk: + dataset: "inform-risk-index-2021" + resource: "INFORM_Risk_2024_v067 .xlsx" + format: "xlsx" + sheet: "INFORM Risk 2024 (a-z)" + headers: 2 + source_date: + start: "01/01/2024" + end: "31/12/2024" + filter_cols: + - "ISO3" + prefilter: "ISO3 in ['AFG', 'BFA', 'MLI', 'NGA', 'TCD', 'YEM']" + admin: + - "ISO3" + admin_exact: True + input: + - "RISK CLASS" + - "Rank" + - "INFORM RISK" + - "HAZARD & EXPOSURE" + - "VULNERABILITY" + - "LACK OF COPING CAPACITY" + - "% of Missing Indicators" + - "Recentness data (average years)" + output: + - "risk_class" + - "global_rank" + - "overall_risk" + - "hazard_exposure_risk" + - "vulnerability_risk" + - "coping_capacity_risk" + - "meta_missing_indicators_pct" + - "meta_avg_recentness_years" + output_hxl: + - "#risk+class" + - "#risk+rank" + - "#risk+total" + - "#risk+hazard" + - "#risk+vulnerability" + - "#risk+coping+capacity" + - "#meta+missing+indicators+pct" + - "#meta+recentness+avg" diff --git a/src/hapi/pipelines/database/national_risk.py b/src/hapi/pipelines/database/national_risk.py new file mode 100644 index 00000000..affd89c2 --- /dev/null +++ b/src/hapi/pipelines/database/national_risk.py @@ -0,0 +1,99 @@ +"""Functions specific to the national risk theme.""" + +from logging import getLogger +from typing import Dict + +from hapi_schema.db_national_risk import DBNationalRisk +from sqlalchemy.orm import Session + +from . import locations +from .base_uploader import BaseUploader +from .metadata import Metadata + +logger = getLogger(__name__) + + +class NationalRisk(BaseUploader): + def __init__( + self, + session: Session, + metadata: Metadata, + locations: locations.Locations, + results: Dict, + ): + super().__init__(session) + self._metadata = metadata + self._locations = locations + self._results = results + + def populate(self): + logger.info("Populating national risk table") + for dataset in self._results.values(): + datasetinfo = self._metadata.runner.scrapers[ + "national_risk_national" + ].datasetinfo + reference_period_start = datasetinfo["source_date"][ + "default_date" + ]["start"] + reference_period_end = datasetinfo["source_date"]["default_date"][ + "end" + ] + for admin_level, admin_results in dataset["results"].items(): + resource_id = admin_results["hapi_resource_metadata"]["hdx_id"] + hxl_tags = admin_results["headers"][1] + locations = list(admin_results["values"][0].keys()) + values = { + hxl_tag: value + for hxl_tag, value in zip( + hxl_tags, admin_results["values"] + ) + } + + for location in locations: + risk_class = values["#risk+class"].get(location) + if risk_class: + risk_class = _get_risk_class_code_from_data(risk_class) + + national_risk_row = DBNationalRisk( + resource_ref=self._metadata.resource_data[resource_id], + location_ref=self._locations.data[location], + risk_class=risk_class, + global_rank=values["#risk+rank"][location], + overall_risk=values["#risk+total"][location], + hazard_exposure_risk=values["#risk+hazard"][location], + vulnerability_risk=values["#risk+vulnerability"][ + location + ], + coping_capacity_risk=values["#risk+coping+capacity"][ + location + ], + meta_missing_indicators_pct=values[ + "#meta+missing+indicators+pct" + ].get(location), + meta_avg_recentness_years=values[ + "#meta+recentness+avg" + ].get(location), + reference_period_start=reference_period_start, + reference_period_end=reference_period_end, + # TODO: For v2+, add to scraper (HAPI-199) + source_data="not yet implemented", + ) + + self._session.add(national_risk_row) + self._session.commit() + + +def _get_risk_class_code_from_data(risk_class: str) -> int: + risk_class = risk_class.lower() + risk_class_code = None + if risk_class == "very high": + risk_class_code = 5 + if risk_class == "high": + risk_class_code = 4 + if risk_class == "medium": + risk_class_code = 3 + if risk_class == "low": + risk_class_code = 2 + if risk_class == "very low": + risk_class_code = 1 + return risk_class_code diff --git a/tests/fixtures/input/inform-risk-index-2021.json b/tests/fixtures/input/inform-risk-index-2021.json new file mode 100644 index 00000000..126a10d3 --- /dev/null +++ b/tests/fixtures/input/inform-risk-index-2021.json @@ -0,0 +1 @@ +{"archived": false, "creator_user_id": "4e1abd75-38a0-46f5-9b8f-6f765231eed9", "data_update_frequency": "180", "dataset_date": "[2014-01-01T00:00:00 TO 2023-11-14T23:59:59]", "dataset_preview": "first_resource", "dataset_source": "INFORM", "due_date": "2024-05-12T14:03:40", "has_geodata": false, "has_quickcharts": false, "has_showcases": true, "id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "is_requestdata_type": false, "isopen": true, "last_modified": "2023-11-14T14:03:40.799412", "license_id": "cc-by", "license_title": "Creative Commons Attribution International", "license_url": "http://www.opendefinition.org/licenses/cc-by", "maintainer": "4e1abd75-38a0-46f5-9b8f-6f765231eed9", "maintainer_email": "thow@un.org", "metadata_created": "2021-06-28T13:59:44.873384", "metadata_modified": "2023-11-15T06:29:58.323053", "methodology": "Other", "methodology_other": "Composite Indicator", "name": "inform-risk-index-2021", "notes": "The INFORM Risk Index is a global, open-source risk assessment for humanitarian crises and disasters. It can support decisions about prevention, preparedness and response.", "num_resources": 3, "num_tags": 0, "organization": {"id": "e116c55a-d536-4b47-9308-94b1c7457afe", "name": "inform", "title": "INFORM", "type": "organization", "description": "INFORM is a multi-stakeholder forum for developing shared, quantitative analysis relevant to humanitarian crises and disasters. INFORM includes organisations from across the multilateral system, including the humanitarian and development sector, donors, and technical partners. The Joint Research Center of European Commission is the scientific and technical lead for INFORM.\r\n\r\nINFORM is developing a suite of quantitative, analytical products to support decision-making on humanitarian crises and disasters. These help make decisions at different stages of the disaster management cycle, specifically prevention, preparedness and response. INFORM develops methodologies and tools for use at the global level and also supports their application at subnational level.", "image_url": "", "created": "2014-09-13T16:09:14.878652", "is_organization": true, "approval_status": "approved", "state": "active"}, "overdue_date": "2024-06-11T14:03:40", "owner_org": "e116c55a-d536-4b47-9308-94b1c7457afe", "package_creator": "andrewthow", "pageviews_last_14_days": 18, "private": false, "qa_completed": false, "review_date": "2023-11-14T14:02:47.798897", "solr_additions": "{\"countries\": [\"World\"]}", "state": "active", "subnational": "0", "title": "INFORM Risk Index", "total_res_downloads": 127, "type": "dataset", "url": null, "version": null, "groups": [{"description": "", "display_name": "World", "id": "world", "image_display_url": "", "name": "world", "title": "World"}], "tags": [], "relationships_as_subject": [], "relationships_as_object": [], "is_fresh": true, "update_status": "fresh", "x_resource_grouping": [], "resources": [{"alt_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/", "cache_last_updated": null, "cache_url": null, "created": "2021-06-28T14:00:24.185308", "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/inform_risk_2024_v067-.xlsx", "format": "XLSX", "fs_check_info": "{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2023-11-14T14:02:54.625213\"}", "hash": "", "hdx_rel_url": "/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/inform_risk_2024_v067-.xlsx", "id": "603e40eb-a620-47e2-b8ac-e51961c7d661", "last_modified": "2023-11-14T14:02:54.750151", "metadata_modified": "2023-11-14T14:03:40.957373", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "INFORM_Risk_2024_v067 .xlsx", "originalHash": "-2021265870", "package_id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "pii": "false", "position": 0, "resource_type": "file.upload", "size": 2235008, "state": "active", "url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/603e40eb-a620-47e2-b8ac-e51961c7d661/download/inform_risk_2024_v067-.xlsx", "url_type": "upload"}, {"alt_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/", "cache_last_updated": null, "cache_url": null, "created": "2023-05-24T12:36:48.835487", "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/inform2024_trend_2014_2023_v67_all-.xlsx", "format": "XLSX", "fs_check_info": "{\"state\": \"processing\", \"message\": \"The processing of the file structure check has started\", \"timestamp\": \"2023-11-14T14:03:40.698771\"}", "hash": "", "hdx_rel_url": "/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/inform2024_trend_2014_2023_v67_all-.xlsx", "id": "a04977ed-ff5d-4b78-a0de-1fdac8765418", "last_modified": "2023-11-14T14:03:40.799412", "metadata_modified": "2023-11-14T14:03:43.287422", "microdata": false, "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype_inner": null, "name": "INFORM2024_TREND_2014_2023_v67_ALL .xlsx", "originalHash": "-971618184", "package_id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "pii": "false", "position": 1, "resource_type": "file.upload", "size": 16651877, "state": "active", "url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/a04977ed-ff5d-4b78-a0de-1fdac8765418/download/inform2024_trend_2014_2023_v67_all-.xlsx", "url_type": "upload"}, {"alt_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/", "cache_last_updated": null, "cache_url": null, "created": "2023-05-24T12:42:29.815651", "datastore_active": false, "description": "", "download_url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/inform-concept-and-methodology-version-2017-pdf-final-4.pdf", "format": "PDF", "hash": "", "hdx_rel_url": "/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/inform-concept-and-methodology-version-2017-pdf-final-4.pdf", "id": "93dd74fd-7b9a-4e61-b452-62b3d5bed4ff", "last_modified": "2023-05-24T12:42:29.602115", "metadata_modified": "2023-05-24T12:42:31.295566", "microdata": false, "mimetype": "application/pdf", "mimetype_inner": null, "name": "INFORM Concept and Methodology Version 2017 Pdf FINAL.pdf", "originalHash": "-988266717", "package_id": "f5ec2ee7-8a1b-49b4-864b-70bdb582a022", "pii": "false", "position": 2, "resource_type": "file.upload", "size": 4065660, "state": "active", "url": "https://data.humdata.org/dataset/f5ec2ee7-8a1b-49b4-864b-70bdb582a022/resource/93dd74fd-7b9a-4e61-b452-62b3d5bed4ff/download/inform-concept-and-methodology-version-2017-pdf-final-4.pdf", "url_type": "upload"}]} diff --git a/tests/fixtures/input/national_risk_inform_risk_2024_v067.xlsx b/tests/fixtures/input/national_risk_inform_risk_2024_v067.xlsx new file mode 100644 index 00000000..6ad09bb5 Binary files /dev/null and b/tests/fixtures/input/national_risk_inform_risk_2024_v067.xlsx differ diff --git a/tests/test_main.py b/tests/test_main.py index 150e7373..ecd8a84a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -13,6 +13,7 @@ from hapi_schema.db_ipc_phase import DBIpcPhase from hapi_schema.db_ipc_type import DBIpcType from hapi_schema.db_location import DBLocation +from hapi_schema.db_national_risk import DBNationalRisk from hapi_schema.db_operational_presence import DBOperationalPresence from hapi_schema.db_org import DBOrg from hapi_schema.db_org_type import DBOrgType @@ -45,6 +46,7 @@ def configuration(self): "core.yaml", "food_security.yaml", "humanitarian_needs.yaml", + "national_risk.yaml", "operational_presence.yaml", "population.yaml", ] @@ -98,9 +100,9 @@ def test_pipelines(self, configuration, folder): pipelines.output() count = session.scalar(select(func.count(DBResource.id))) - assert count == 16 + assert count == 17 count = session.scalar(select(func.count(DBDataset.id))) - assert count == 10 + assert count == 11 count = session.scalar(select(func.count(DBLocation.id))) assert count == 6 count = session.scalar(select(func.count(DBAdmin1.id))) @@ -143,6 +145,10 @@ def test_pipelines(self, configuration, folder): select(func.count(DBHumanitarianNeeds.id)) ) assert count == 47126 + count = session.scalar( + select(func.count(DBNationalRisk.id)) + ) + assert count == 6 org_mapping = pipelines.org._org_lookup assert org_mapping["Action against Hunger"] == {