From d37c427dac3b2dcd5c737ab614dc116fa22ea576 Mon Sep 17 00:00:00 2001 From: t-downing Date: Wed, 27 Nov 2024 10:55:45 -0800 Subject: [PATCH 1/2] sort dates --- callbacks/callbacks.py | 25 ++++++++++++++++++------- pyproject.toml | 6 ++++++ utils/chart_utils.py | 21 +++++++++++++++++---- 3 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 pyproject.toml diff --git a/callbacks/callbacks.py b/callbacks/callbacks.py index de70864..a48fc94 100644 --- a/callbacks/callbacks.py +++ b/callbacks/callbacks.py @@ -2,13 +2,17 @@ import dash_mantine_components as dmc from dash import Input, Output, State, dcc, html, no_update from dash_extensions.javascript import arrow_function, assign +from utils.chart_utils import create_return_period_plot, create_timeseries_plot +from utils.data_utils import ( + calculate_return_periods, + fetch_flood_data, + get_summary, + process_flood_data, +) +from utils.log_utils import get_logger # TODO: Be more careful with engine? from constants import ATTRIBUTION, CHD_GREEN, URL, URL_LABELS, engine -from utils.chart_utils import create_return_period_plot, create_timeseries_plot -from utils.data_utils import (calculate_return_periods, fetch_flood_data, - get_summary, process_flood_data) -from utils.log_utils import get_logger logger = get_logger("callbacks") @@ -68,7 +72,9 @@ def set_adm_value(adm_level): id="geojson", style=style_handle, hideout=dict(selected=""), - hoverStyle=arrow_function({"fillColor": "#1f77b4", "fillOpacity": 0.8}), + hoverStyle=arrow_function( + {"fillColor": "#1f77b4", "fillOpacity": 0.8} + ), zoomToBounds=True, ) adm0 = dl.GeoJSON( @@ -110,13 +116,16 @@ def update_plot(pcode, adm_level): "", ) df_exposure, df_adm = fetch_flood_data(engine, pcode, adm_level) + df_exposure = df_exposure.sort_values("date") if len(df_exposure) == 0: logger.warning(f"No data available for {pcode}") return ( [ dmc.Space(h=100), - dmc.Center(html.Div("No data available for selected location")), + dmc.Center( + html.Div("No data available for selected location") + ), ], dmc.Center("No data available"), "", @@ -142,7 +151,9 @@ def update_plot(pcode, adm_level): return exposure_chart, rp_chart, name, exposed_summary # TODO: Would be better as a clientside callback, but couldn't seem to get it to work... - @app.callback(Output("hover-place-name", "children"), Input("geojson", "hoverData")) + @app.callback( + Output("hover-place-name", "children"), Input("geojson", "hoverData") + ) def info_hover(feature): if feature: return feature["properties"]["name"] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d84cc51 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[tool.black] +line-length = 79 + +[tool.isort] +profile = "black" +line_length = 79 diff --git a/utils/chart_utils.py b/utils/chart_utils.py index e550742..62fa67d 100644 --- a/utils/chart_utils.py +++ b/utils/chart_utils.py @@ -3,6 +3,8 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN): """Create timeseries plot using Plotly.""" + df_seasonal = df_seasonal.sort_values("eff_date") + df_processed = df_processed.sort_values("date", ascending=False) fig = go.Figure() # Add seasonal average @@ -18,7 +20,13 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN): # Add yearly traces for year in df_processed["date"].dt.year.unique(): - color = CHD_GREEN if year == 2024 else "red" if year in peak_years else "grey" + color = ( + CHD_GREEN + if year == 2024 + else "red" + if year in peak_years + else "grey" + ) linewidth = 3 if year == 2024 else 0.2 df_year = df_processed[df_processed["date"].dt.year == year] @@ -40,8 +48,11 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN): margin={"t": 10, "l": 0, "r": 0, "b": 0}, font=dict(family="Arial, sans-serif"), ) - fig.update_yaxes(rangemode="tozero", title="Population exposed to flooding") - fig.update_xaxes(title="Date") + fig.update_yaxes( + rangemode="tozero", title="Population exposed to flooding" + ) + # set x max to year 1900 + fig.update_xaxes(title="Date", range=["1900-01-01", "1900-12-31"]) return fig @@ -86,7 +97,9 @@ def create_return_period_plot(df_peaks, CHD_GREEN, rp=3): ) # Add other significant years - df_rp_peaks = df_peaks[(df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)] + df_rp_peaks = df_peaks[ + (df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024) + ] fig.add_trace( go.Scatter( x=df_rp_peaks["rp"], From 50c1a2a8110eeb323fe994645cf85c2dde6ae24a Mon Sep 17 00:00:00 2001 From: t-downing Date: Wed, 27 Nov 2024 11:18:04 -0800 Subject: [PATCH 2/2] flake8 run --- app.py | 2 +- layouts/content.py | 12 +++++++----- layouts/modal.py | 3 ++- layouts/navbar.py | 9 +++++++-- pipelines/blob_utils.py | 15 +++++++++++---- populate_database.py | 6 ++++-- utils/data_utils.py | 32 +++++++++++++++++++++----------- 7 files changed, 53 insertions(+), 26 deletions(-) diff --git a/app.py b/app.py index b7311db..7e13e13 100644 --- a/app.py +++ b/app.py @@ -1,10 +1,10 @@ from dash import Dash, dcc +from utils.log_utils import setup_logging from callbacks.callbacks import register_callbacks from layouts.content import content from layouts.modal import disclaimer_modal from layouts.navbar import module_bar, navbar -from utils.log_utils import setup_logging app = Dash(__name__, update_title=None, suppress_callback_exceptions=True) server = app.server diff --git a/layouts/content.py b/layouts/content.py index a0c6037..b7f6490 100644 --- a/layouts/content.py +++ b/layouts/content.py @@ -89,7 +89,7 @@ def info_container(): """ Flood extent data is from [Floodscan](https://www.aer.com/weather-risk-management/floodscan-near-real-time-and-historical-flood-mapping/). Population distributions are from [WorldPop](https://www.worldpop.org/). Administrative boundaries are from [FieldMaps](https://fieldmaps.io/). - """ + """ # noqa ), ], title="Data Sources", @@ -103,7 +103,7 @@ def info_container(): extent is less than 5% to reduce noise. The daily exposure rasters are then aggregated to the admin2 level. This is similar to the [method](https://docs.google.com/document/d/16-TrPdCF7dCx5thpdA7dXB8k1MUOJUovWaRVIjEJNUE/edit?tab=t.0#heading=h.rtvq16oq23gp) initially developed for the 2024 Somalia HNRP. Admin0 and admin1 exposure is calculated simply by summing the admin2 exposures. - """ + """ # noqa ), dcc.Markdown( """ @@ -111,7 +111,7 @@ def info_container(): for all admin levels is taken taken as the maximum instantaneous flood exposure for any day in the year (up to the current day of the year). Note that this does not take into account flooding in one part of the area on one day and another part on another day. In this case, the yearly maximum would be the maximum of these values, not the sum. - """ + """ # noqa ), ], title="Methodology", @@ -121,7 +121,7 @@ def info_container(): """ The code used to calculate the daily flood exposure is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring). The code used to calculate return period and run this app is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring-app). - """ + """ # noqa ), title="Resources", ), @@ -192,7 +192,9 @@ def chart_container(): ) severity_tab = html.Div( style={"backgroundColor": "white", "width": "100%", "height": "100%"}, - children=dmc.LoadingOverlay(html.Div(id="rp-chart"), style={"height": "100%"}), + children=dmc.LoadingOverlay( + html.Div(id="rp-chart"), style={"height": "100%"} + ), ) return dbc.Tabs( [ diff --git a/layouts/modal.py b/layouts/modal.py index 86f674f..b85e33c 100644 --- a/layouts/modal.py +++ b/layouts/modal.py @@ -6,7 +6,8 @@ def disclaimer_modal(): return dbc.Modal( [ dbc.ModalHeader( - dbc.ModalTitle("Disclaimer", className="header"), close_button=True + dbc.ModalTitle("Disclaimer", className="header"), + close_button=True, ), dbc.ModalBody( [ diff --git a/layouts/navbar.py b/layouts/navbar.py index 2ed5e70..3b21b19 100644 --- a/layouts/navbar.py +++ b/layouts/navbar.py @@ -13,7 +13,8 @@ def navbar(): [ dbc.Col( html.Img( - src="assets/centre_banner_greenbg.png", height=40 + src="assets/centre_banner_greenbg.png", + height=40, ), ), dbc.Col( @@ -33,7 +34,11 @@ def navbar(): ], fluid=True, ), - style={"height": f"{NAVBAR_HEIGHT}px", "margin": "0px", "padding": "10px"}, + style={ + "height": f"{NAVBAR_HEIGHT}px", + "margin": "0px", + "padding": "10px", + }, color="primary", dark=True, ) diff --git a/pipelines/blob_utils.py b/pipelines/blob_utils.py index b05acb1..6d5228d 100644 --- a/pipelines/blob_utils.py +++ b/pipelines/blob_utils.py @@ -21,7 +21,8 @@ def get_container_client( ): sas = DEV_BLOB_SAS if stage == "dev" else PROD_BLOB_SAS container_url = ( - f"https://imb0chd0{stage}.blob.core.windows.net/" f"{container_name}?{sas}" + f"https://imb0chd0{stage}.blob.core.windows.net/" + f"{container_name}?{sas}" ) return ContainerClient.from_container_url(container_url) @@ -31,7 +32,9 @@ def load_parquet_from_blob( stage: Literal["prod", "dev"] = "dev", container_name: str = "projects", ): - blob_data = load_blob_data(blob_name, stage=stage, container_name=container_name) + blob_data = load_blob_data( + blob_name, stage=stage, container_name=container_name + ) return pd.read_parquet(io.BytesIO(blob_data)) @@ -42,7 +45,9 @@ def load_gdf_from_blob( with zipfile.ZipFile(io.BytesIO(blob_data), "r") as zip_ref: zip_ref.extractall("temp") if shapefile is None: - shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][0] + shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][ + 0 + ] gdf = gpd.read_file(f"temp/{shapefile}") return gdf @@ -52,7 +57,9 @@ def load_blob_data( stage: Literal["prod", "dev"] = "dev", container_name: str = "projects", ): - container_client = get_container_client(stage=stage, container_name=container_name) + container_client = get_container_client( + stage=stage, container_name=container_name + ) blob_client = container_client.get_blob_client(blob_name) data = blob_client.download_blob().readall() return data diff --git a/populate_database.py b/populate_database.py index e862687..f75a767 100644 --- a/populate_database.py +++ b/populate_database.py @@ -48,7 +48,9 @@ def load_data(engine): ) def calculate_rolling(group, window=7): - group[f"roll{window}"] = group["total_exposed"].rolling(window=window).mean() + group[f"roll{window}"] = ( + group["total_exposed"].rolling(window=window).mean() + ) return group window = 7 @@ -94,7 +96,7 @@ def calculate_rolling(group, window=7): if __name__ == "__main__": print("Populating database...") engine = create_engine( - f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres" + f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres" # noqa ) data = load_data(engine) print("Database update completed.") diff --git a/utils/data_utils.py b/utils/data_utils.py index e227706..b64cb71 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -2,7 +2,6 @@ import pandas as pd from sqlalchemy import text - from utils.log_utils import get_logger logger = get_logger("data") @@ -17,12 +16,16 @@ def fetch_flood_data(engine, pcode, adm_level): logger.info(f"Getting flood exposure data for {pcode}...") start = time.time() with engine.connect() as con: - df_exposure = pd.read_sql_query(query_exposure, con, params={"pcode": pcode}) + df_exposure = pd.read_sql_query( + query_exposure, con, params={"pcode": pcode} + ) df_adm = pd.read_sql_query(query_adm, con) df_adm = df_adm[df_adm[f"adm{adm_level}_pcode"] == pcode] elapsed = time.time() - start - logger.debug(f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s") + logger.debug( + f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s" + ) return df_exposure, df_adm @@ -37,7 +40,9 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7): .mean() .reset_index() ) - df_seasonal["eff_date"] = pd.to_datetime(df_seasonal["dayofyear"], format="%j") + df_seasonal["eff_date"] = pd.to_datetime( + df_seasonal["dayofyear"], format="%j" + ) # Filter data today_dayofyear = df_exposure.iloc[-1]["dayofyear"] @@ -45,9 +50,9 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7): # Calculate peaks df_peaks = ( - df_to_today.groupby([df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"])[ - val_col - ] + df_to_today.groupby( + [df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"] + )[val_col] .max() .reset_index() ) @@ -68,7 +73,10 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7): .groupby("eff_date")[val_col] .sum() .reset_index(), - p[p["adm1_pcode"] == pcode].groupby("date")[val_col].sum().reset_index(), + p[p["adm1_pcode"] == pcode] + .groupby("date")[val_col] + .sum() + .reset_index(), ), "2": lambda d, s, p: ( d[d["adm2_pcode"] == pcode], @@ -77,10 +85,12 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7): ), } - df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[adm_level]( - df_exposure, df_seasonal, df_peaks + df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[ + adm_level + ](df_exposure, df_seasonal, df_peaks) + df_processed["eff_date"] = pd.to_datetime( + df_processed["dayofyear"], format="%j" ) - df_processed["eff_date"] = pd.to_datetime(df_processed["dayofyear"], format="%j") return df_processed, df_seasonal_final, df_peaks_final