From 14dc6bd9592eba99f2f33bf1a3b8de94443d983b Mon Sep 17 00:00:00 2001 From: hannahker Date: Mon, 25 Nov 2024 10:59:39 -0800 Subject: [PATCH 1/3] Write polygon loading script --- helpers/load_polygons.py | 79 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 helpers/load_polygons.py diff --git a/helpers/load_polygons.py b/helpers/load_polygons.py new file mode 100644 index 0000000..92e56ae --- /dev/null +++ b/helpers/load_polygons.py @@ -0,0 +1,79 @@ +""" +This is a temporary helper script to load CODAB data from Fieldmaps to +a private Azure Storage Container. This is done to avoid overloading the +Fieldmaps server during large historical runs, and to integrate some basic +data cleaning that needs to be done on select ISO3 datasets. + +This script will likely be quickly deprecated, so has not been written to +full production standards. + +Usage: Run LOCALLY from root-level project directory `python helpers/load_polygons.py` +""" + + +import os +import zipfile +from pathlib import Path + +import geopandas as gpd +import requests + +from src.utils.cloud_utils import get_container_client +from src.utils.iso3_utils import get_metadata, load_shp + +df = get_metadata() +# TODO: Swap out "dev"/"prod" depending on which container +# you're writing to +container_client = get_container_client("dev", "polygon") +data_dir = Path("data/tmp") + + +def download_zip(url): + response = requests.get(url) + if response.status_code == 200: + return response.content + else: + print(f"Failed to download: {url}") + return None + + +if __name__ == "__main__": + # TODO: Right now only set up to run locally + for _, row in df.iterrows(): + shp_link = row["o_shp"] + iso3 = row["iso_3"] + print(f"Processing data for {iso3}...") + zip_data = download_zip(shp_link) + + # Specific ISO3s that need to be dissolved at adm0 level + # Temporary workaround before it's fixed in Fieldmaps + if iso3 in ["NGA", "TCD", "BDI"]: + outpath = "data/tmp/" + load_shp(shp_link, outpath, iso3) + adm0 = gpd.read_file(f"{outpath}{iso3}_adm0.shp") + adm0 = adm0.dissolve() + adm0.to_file(f"{outpath}{iso3}_adm0.shp") + + zip_name = f"{data_dir}/{iso3.lower()}_shp.zip" + + with zipfile.ZipFile(zip_name, "w") as zipf: + for adm_level in range(3): # 0 to 2 + base_name = f"{iso3.lower()}_adm{adm_level}" + for ext in [".shp", ".dbf", ".prj", ".shx", ".cpg"]: + file_path = os.path.join(data_dir, base_name + ext) + if os.path.exists(file_path): + zipf.write(file_path, os.path.basename(file_path)) + + with open(zip_name, "rb") as zip_data: + blob_name = f"{iso3.lower()}_shp.zip" + container_client.upload_blob( + name=blob_name, data=zip_data, overwrite=True + ) + + elif zip_data: + blob_name = f"{iso3.lower()}_shp.zip" + container_client.upload_blob( + name=blob_name, data=zip_data, overwrite=True + ) + else: + print(f"Skipping {iso3} due to download failure") From dc543bbc2ce391050803624c281cbf0f13aa2573 Mon Sep 17 00:00:00 2001 From: hannahker Date: Mon, 25 Nov 2024 11:00:03 -0800 Subject: [PATCH 2/3] Make src module installable --- .gitignore | 3 +++ README.md | 1 + pyproject.toml | 6 ++++++ setup.cfg | 5 +++++ 4 files changed, 15 insertions(+) create mode 100644 pyproject.toml create mode 100644 setup.cfg diff --git a/.gitignore b/.gitignore index b3afd66..1055dd2 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,6 @@ test_outputs/* *.sql data/* + + +*.egg-info/ diff --git a/README.md b/README.md index 4525f07..f22d3bb 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ source venv/bin/activate ``` pip install -r requirements.txt pip install -r requirements-dev.txt +pip install -e . ``` 3. Create a local `.env` file with the following environment variables: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d84cc51 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[tool.black] +line-length = 79 + +[tool.isort] +profile = "black" +line_length = 79 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..e106c38 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[metadata] +name = src + +[options] +packages = src From 78336508b26cf2c0c71d50dc920ca0f91b98218e Mon Sep 17 00:00:00 2001 From: hannahker Date: Mon, 25 Nov 2024 11:00:39 -0800 Subject: [PATCH 3/3] Set up test runs for all datasets to rerun with ISO3s that need redoing --- src/config/era5.yml | 6 +++--- src/config/floodscan.yml | 6 +++--- src/config/imerg.yml | 6 +++--- src/config/seas5.yml | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/config/era5.yml b/src/config/era5.yml index 4e08a38..0f6e993 100644 --- a/src/config/era5.yml +++ b/src/config/era5.yml @@ -3,6 +3,6 @@ start_date: 1981-01-01 end_date: Null forecast: False test: - start_date: 2020-01-01 - end_date: 2020-02-01 - iso3s: ["AFG"] + start_date: 1981-01-01 + end_date: Null + iso3s: ["BDI", "NGA", "TCD"] diff --git a/src/config/floodscan.yml b/src/config/floodscan.yml index 6acd7b4..0831683 100644 --- a/src/config/floodscan.yml +++ b/src/config/floodscan.yml @@ -5,7 +5,7 @@ forecast: False extra_dims: - band : str test: - start_date: 2023-12-01 - end_date: 2024-01-31 - iso3s: ["ETH"] + start_date: 1981-01-01 + end_date: Null + iso3s: ["BDI", "NGA", "TCD"] coverage: ["DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "TCD", "COM", "COG", "CIV", "CAP", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "KEN", "LS0", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MAR", "MOZ", "NAM", "NER", "NGA", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TGO", "TUN", "UGA", "TZA", "ZMB", "ZWE"] diff --git a/src/config/imerg.yml b/src/config/imerg.yml index 745d79a..35d28d3 100644 --- a/src/config/imerg.yml +++ b/src/config/imerg.yml @@ -3,6 +3,6 @@ start_date: 2000-06-01 end_date: Null forecast: False test: - start_date: 2020-01-01 - end_date: 2020-01-15 - iso3s: ["ETH"] + start_date: 1981-01-01 + end_date: Null + iso3s: ["BDI", "NGA", "TCD"] diff --git a/src/config/seas5.yml b/src/config/seas5.yml index fc0b7c4..204cdd0 100644 --- a/src/config/seas5.yml +++ b/src/config/seas5.yml @@ -5,6 +5,6 @@ forecast: True extra_dims: - leadtime : int test: - start_date: 2024-01-01 - end_date: 2024-02-01 - iso3s: ["AFG"] + start_date: 1981-01-01 + end_date: Null + iso3s: ["BDI", "NGA", "TCD"]