-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Floodscan stats #11
Floodscan stats #11
Changes from all commits
99b2535
bedea11
54c782c
ee60294
79d37a5
8b1a525
fca4e98
ee025ea
1bdcd09
fcee0ac
75843d5
c0610b0
d1ce782
e208fad
dcb53c0
6bdfd7c
53b6076
00d097e
170aa5a
203ded0
54e27d9
2d819d8
edb7815
a0d6461
1416abd
38f2dec
5b3f774
d229916
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
blob_prefix: floodscan/daily/v5/processed/aer_area_300s_ | ||
start_date: 1998-01-12 | ||
end_date: Null | ||
forecast: False | ||
extra_dims: | ||
- band : str | ||
test: | ||
start_date: 2023-12-01 | ||
end_date: 2024-01-31 | ||
iso3s: ["ETH"] | ||
coverage: ["DZA", "AGO", "BEN", "BWA", "BFA", "BDI", "CPV", "CMR", "CAF", "TCD", "COM", "COG", "CIV", "CAP", "DJI", "EGY", "GNQ", "ERI", "SWZ", "ETH", "GAB", "GMB", "GHA", "GIN", "GNB", "KEN", "LS0", "LBR", "LBY", "MDG", "MWI", "MLI", "MRT", "MUS", "MAR", "MOZ", "NAM", "NER", "NGA", "RWA", "STP", "SEN", "SYC", "SLE", "SOM", "ZAF", "SSD", "SDN", "TGO", "TUN", "UGA", "TZA", "ZMB", "ZWE"] | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
|
||
import pandas as pd | ||
from dateutil.relativedelta import relativedelta | ||
from sqlalchemy import VARCHAR, Integer | ||
|
||
from src.utils.cloud_utils import get_container_client | ||
|
||
|
@@ -110,3 +111,15 @@ def parse_date(filename): | |
""" | ||
res = re.search("([0-9]{4}-[0-9]{2}-[0-9]{2})", filename) | ||
return pd.to_datetime(res[0]) | ||
|
||
|
||
def parse_extra_dims(extra_dims): | ||
parsed_extra_dims = {} | ||
for extra_dim in extra_dims: | ||
dim = next(iter(extra_dim)) | ||
if extra_dim[dim] == "str": | ||
parsed_extra_dims[dim] = VARCHAR | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future, we'll probably want to have other data types, but can leave as is for now |
||
parsed_extra_dims[dim] = Integer | ||
|
||
return parsed_extra_dims |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ def cli_args(): | |
parser.add_argument( | ||
"dataset", | ||
help="Dataset for which to calculate raster stats", | ||
choices=["seas5", "era5", "imerg"], | ||
choices=["seas5", "era5", "imerg", "floodscan"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also as discussed, in the future it'd be good to think about how we can avoid the hard-coding of each dataset (since this happens in numerous places throughout the code). Although really more of a note for myself since I set it up this way... |
||
default=None, | ||
nargs="?", | ||
) | ||
|
@@ -36,4 +36,10 @@ def cli_args(): | |
help="Update the iso3 and polygon metadata tables.", | ||
action="store_true", | ||
) | ||
parser.add_argument( | ||
"--chunksize", | ||
help="Limit the SQL insert batches to an specific chunksize.", | ||
type=int, | ||
default=100000, | ||
) | ||
return parser.parse_args() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import requests | ||
from sqlalchemy import text | ||
|
||
from src.config.settings import load_pipeline_config | ||
from src.utils.cloud_utils import get_container_client | ||
from src.utils.database_utils import create_iso3_table | ||
|
||
|
@@ -144,6 +145,19 @@ def determine_max_adm_level(row): | |
return min(1, row["src_lvl"]) | ||
|
||
|
||
def load_coverage(): | ||
pipelines = ["seas5", "era5", "imerg", "floodscan"] | ||
coverage = {} | ||
|
||
for dataset in pipelines: | ||
config = load_pipeline_config(dataset) | ||
if "coverage" in config: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be worth adding at least a comment in here to remind ourselves that we're assuming that the coverage is specified in ISO3 values. |
||
dataset_coverage = config["coverage"] | ||
coverage[dataset] = dataset_coverage | ||
|
||
return coverage | ||
|
||
|
||
def create_iso3_df(engine): | ||
""" | ||
Create and populate an ISO3 table in the database with country information. | ||
|
@@ -178,6 +192,8 @@ def create_iso3_df(engine): | |
) | ||
& (df_hrp["endDate"] >= current_date) # noqa | ||
] | ||
dataset_coverage = load_coverage() | ||
|
||
iso3_codes = set() | ||
for locations in df_active_hrp["locations"]: | ||
iso3_codes.update(locations.split("|")) | ||
|
@@ -187,6 +203,9 @@ def create_iso3_df(engine): | |
df["max_adm_level"] = df.apply(determine_max_adm_level, axis=1) | ||
df["stats_last_updated"] = None | ||
|
||
for dataset in dataset_coverage: | ||
df[dataset] = df["iso_3"].isin(dataset_coverage[dataset]) | ||
|
||
# TODO: This list seems to have some inconsistencies when compared against the | ||
# contents of all polygons | ||
# Also need global p-codes list from https://fieldmaps.io/data/cod | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also a nice way to do this!