Skip to content

Commit

Permalink
HDXDSYS-950 Use global HPC HNO dataset(s) (#144)
Browse files Browse the repository at this point in the history
* Read HNO data from global dataset
* Refactoring of database outputs
* Update CHANGELOG
  • Loading branch information
mcarans authored Aug 14, 2024
1 parent 9a8ab00 commit f1b8a42
Show file tree
Hide file tree
Showing 14 changed files with 75,739 additions and 75,725 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.9.45] - 2024-08-15

### Changed

- Read HNO data from global annual dataset(s)

## [0.9.44] - 2024-08-06

### Changed
Expand Down
22 changes: 11 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# uv pip compile pyproject.toml --resolver=backtracking --all-extras -o requirements.txt
annotated-types==0.7.0
# via pydantic
attrs==23.2.0
attrs==24.2.0
# via
# frictionless
# jsonlines
Expand All @@ -12,7 +12,7 @@ cachetools==5.4.0
# via google-auth
certifi==2024.7.4
# via requests
cffi==1.16.0
cffi==1.17.0
# via cryptography
cfgv==3.4.0
# via pre-commit
Expand All @@ -24,7 +24,7 @@ ckanapi==4.8
# via hdx-python-api
click==8.1.7
# via typer
coverage==7.6.0
coverage==7.6.1
# via pytest-cov
cryptography==43.0.0
# via pyopenssl
Expand All @@ -48,7 +48,7 @@ filelock==3.15.4
# via virtualenv
frictionless==5.17.0
# via hdx-python-utilities
google-auth==2.32.0
google-auth==2.33.0
# via
# google-auth-oauthlib
# gspread
Expand All @@ -69,7 +69,7 @@ hdx-python-country==3.7.7
# hapi-pipelines (pyproject.toml)
# hdx-python-api
# hdx-python-scraper
hdx-python-database==1.3.1
hdx-python-database==1.3.3
# via hapi-pipelines (pyproject.toml)
hdx-python-scraper==2.4.1
# via hapi-pipelines (pyproject.toml)
Expand Down Expand Up @@ -124,7 +124,7 @@ markupsafe==2.1.5
# via jinja2
mdurl==0.1.2
# via markdown-it-py
more-itertools==10.3.0
more-itertools==10.4.0
# via inflect
ndg-httpsclient==0.5.1
# via hdx-python-api
Expand Down Expand Up @@ -201,7 +201,7 @@ python-slugify==8.0.4
# via
# ckanapi
# frictionless
pyyaml==6.0.1
pyyaml==6.0.2
# via
# frictionless
# pre-commit
Expand Down Expand Up @@ -232,7 +232,7 @@ rfc3986==2.0.0
# via frictionless
rich==13.7.1
# via typer
rpds-py==0.19.1
rpds-py==0.20.0
# via
# jsonschema
# referencing
Expand All @@ -242,7 +242,7 @@ ruamel-yaml==0.18.6
# via hdx-python-utilities
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
setuptools==72.1.0
setuptools==72.2.0
# via ckanapi
shellingham==1.5.4
# via typer
Expand All @@ -259,7 +259,7 @@ six==1.16.0
# sphinxcontrib-napoleon
sphinxcontrib-napoleon==0.7
# via defopt
sqlalchemy==2.0.31
sqlalchemy==2.0.32
# via
# hapi-pipelines (pyproject.toml)
# hapi-schema
Expand Down Expand Up @@ -299,7 +299,7 @@ validators==0.33.0
# via frictionless
virtualenv==20.26.3
# via pre-commit
wheel==0.43.0
wheel==0.44.0
# via libhxl
xlrd==2.0.1
# via hdx-python-utilities
Expand Down
94 changes: 57 additions & 37 deletions src/hapi/pipelines/app/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
session=session,
use_live=use_live,
)
countries = configuration["HAPI_countries"]
self.countries = configuration["HAPI_countries"]
libhxl_dataset = AdminLevel.get_libhxl_dataset().cache()
self.admins = Admins(
configuration, session, self.locations, libhxl_dataset
Expand All @@ -61,9 +61,9 @@ def __init__(
self.adminone = AdminLevel(admin_config=admin1_config, admin_level=1)
admin2_config = configuration["admin2"]
self.admintwo = AdminLevel(admin_config=admin2_config, admin_level=2)
self.adminone.setup_from_libhxl_dataset(libhxl_dataset, countries)
self.adminone.setup_from_libhxl_dataset(libhxl_dataset, self.countries)
self.adminone.load_pcode_formats()
self.admintwo.setup_from_libhxl_dataset(libhxl_dataset, countries)
self.admintwo.setup_from_libhxl_dataset(libhxl_dataset, self.countries)
self.admintwo.load_pcode_formats()
self.admintwo.set_parent_admins_from_adminlevels([self.adminone])
logger.info("Admin one name mappings:")
Expand Down Expand Up @@ -91,7 +91,7 @@ def __init__(

Sources.set_default_source_date_format("%Y-%m-%d")
self.runner = Runner(
countries,
self.countries,
today=today,
errors_on_exit=errors_on_exit,
scrapers_to_run=scrapers_to_run,
Expand All @@ -101,27 +101,6 @@ def __init__(
self.metadata = Metadata(
runner=self.runner, session=session, today=today
)
self.wfp_commodity = WFPCommodity(
session=session,
datasetinfo=configuration["wfp_commodity"],
)
self.wfp_market = WFPMarket(
session=session,
datasetinfo=configuration["wfp_market"],
countryiso3s=countries,
admins=self.admins,
adminone=self.adminone,
admintwo=self.admintwo,
)
self.food_price = FoodPrice(
session=session,
datasetinfo=configuration["wfp_countries"],
countryiso3s=countries,
metadata=self.metadata,
currency=self.currency,
commodity=self.wfp_commodity,
market=self.wfp_market,
)

def create_configurable_scrapers(self):
def _create_configurable_scrapers(
Expand Down Expand Up @@ -200,15 +179,7 @@ def _create_configurable_scrapers(
def run(self):
self.runner.run()

def output(self):
self.locations.populate()
self.admins.populate()
self.metadata.populate()
self.org.populate()
self.org_type.populate()
self.sector.populate()
self.currency.populate()

def output_population(self):
if not self.themes_to_run or "population" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["population"]
Expand All @@ -221,6 +192,7 @@ def output(self):
)
population.populate()

def output_operational_presence(self):
if (
not self.themes_to_run
or "operational_presence" in self.themes_to_run
Expand All @@ -242,6 +214,7 @@ def output(self):
)
operational_presence.populate()

def output_food_security(self):
if not self.themes_to_run or "food_security" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["food_security"]
Expand All @@ -254,6 +227,7 @@ def output(self):
)
food_security.populate()

def output_humanitarian_needs(self):
if (
not self.themes_to_run
or "humanitarian_needs" in self.themes_to_run
Expand All @@ -267,6 +241,7 @@ def output(self):
)
humanitarian_needs.populate()

def output_national_risk(self):
if not self.themes_to_run or "national_risk" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["national_risk"]
Expand All @@ -279,6 +254,7 @@ def output(self):
)
national_risk.populate()

def output_refugees(self):
if not self.themes_to_run or "refugees" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["refugees"]
Expand All @@ -291,6 +267,7 @@ def output(self):
)
refugees.populate()

def output_funding(self):
if not self.themes_to_run or "funding" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["funding"]
Expand All @@ -303,6 +280,7 @@ def output(self):
)
funding.populate()

def output_poverty_rate(self):
if not self.themes_to_run or "poverty_rate" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["poverty_rate"]
Expand All @@ -316,6 +294,7 @@ def output(self):
)
poverty_rate.populate()

def output_conflict_event(self):
if not self.themes_to_run or "conflict_event" in self.themes_to_run:
results = self.runner.get_hapi_results(
self.configurable_scrapers["conflict_event"]
Expand All @@ -329,10 +308,51 @@ def output(self):
)
conflict_event.populate()

def output_food_prices(self):
if not self.themes_to_run or "food_prices" in self.themes_to_run:
self.wfp_commodity.populate()
self.wfp_market.populate()
self.food_price.populate()
wfp_commodity = WFPCommodity(
session=self.session,
datasetinfo=self.configuration["wfp_commodity"],
)
wfp_commodity.populate()
wfp_market = WFPMarket(
session=self.session,
datasetinfo=self.configuration["wfp_market"],
countryiso3s=self.countries,
admins=self.admins,
adminone=self.adminone,
admintwo=self.admintwo,
)
wfp_market.populate()
food_price = FoodPrice(
session=self.session,
datasetinfo=self.configuration["wfp_countries"],
countryiso3s=self.countries,
metadata=self.metadata,
currency=self.currency,
commodity=wfp_commodity,
market=wfp_market,
)
food_price.populate()

def output(self):
self.locations.populate()
self.admins.populate()
self.metadata.populate()
self.org.populate()
self.org_type.populate()
self.sector.populate()
self.currency.populate()
self.output_population()
self.output_operational_presence()
self.output_food_security()
self.output_humanitarian_needs()
self.output_national_risk()
self.output_refugees()
self.output_funding()
self.output_poverty_rate()
self.output_conflict_event()
self.output_food_prices()

def debug(self, folder: str) -> None:
self.org.output_org_map(folder)
11 changes: 4 additions & 7 deletions src/hapi/pipelines/database/humanitarian_needs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
self._sector = sector
self._configuration = configuration

def get_admin2_ref(self, countryiso3, row, dataset_name, errors):
def get_admin2_ref(self, row, dataset_name, errors):
admin_code = row["Admin 2 PCode"]
if admin_code == "#adm2+code": # ignore HXL row
return None
Expand All @@ -46,7 +46,7 @@ def get_admin2_ref(self, countryiso3, row, dataset_name, errors):
if admin_code:
admin_level = "adminone"
else:
admin_code = countryiso3
admin_code = row["Country ISO3"]
admin_level = "national"
admin2_code = admins.get_admin2_code_based_on_level(
admin_code=admin_code, admin_level=admin_level
Expand All @@ -63,7 +63,7 @@ def populate(self):
reader = Read.get_reader("hdx")
datasets = reader.search_datasets(
filename="hno_dataset",
fq="name:hno-data-for-*",
fq="name:global-hpc-hno-*",
configuration=self._configuration,
)
warnings = set()
Expand All @@ -73,7 +73,6 @@ def populate(self):
rounded_values = []
dataset_name = dataset["name"]
self._metadata.add_dataset(dataset)
countryiso3 = dataset.get_location_iso3s()[0]
time_period = dataset.get_time_period()
time_period_start = time_period["startdate_str"]
time_period_end = time_period["enddate_str"]
Expand All @@ -83,9 +82,7 @@ def populate(self):
headers, rows = reader.get_tabular_rows(url, dict_form=True)
# Admin 1 PCode,Admin 2 PCode,Sector,Gender,Age Group,Disabled,Population Group,Population,In Need,Targeted,Affected,Reached
for row in rows:
admin2_ref = self.get_admin2_ref(
countryiso3, row, dataset_name, errors
)
admin2_ref = self.get_admin2_ref(row, dataset_name, errors)
if not admin2_ref:
continue
population_group = row["Population Group"]
Expand Down
Loading

0 comments on commit f1b8a42

Please sign in to comment.