Skip to content

Commit

Permalink
Merge pull request #30 from OCHA-DAP/fix-timeseries
Browse files Browse the repository at this point in the history
sort dates on timeseries plot
  • Loading branch information
t-downing authored Nov 27, 2024
2 parents 34f3106 + 50c1a2a commit 186812b
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 37 deletions.
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from dash import Dash, dcc
from utils.log_utils import setup_logging

from callbacks.callbacks import register_callbacks
from layouts.content import content
from layouts.modal import disclaimer_modal
from layouts.navbar import module_bar, navbar
from utils.log_utils import setup_logging

app = Dash(__name__, update_title=None, suppress_callback_exceptions=True)
server = app.server
Expand Down
25 changes: 18 additions & 7 deletions callbacks/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
import dash_mantine_components as dmc
from dash import Input, Output, State, dcc, html, no_update
from dash_extensions.javascript import arrow_function, assign
from utils.chart_utils import create_return_period_plot, create_timeseries_plot
from utils.data_utils import (
calculate_return_periods,
fetch_flood_data,
get_summary,
process_flood_data,
)
from utils.log_utils import get_logger

# TODO: Be more careful with engine?
from constants import ATTRIBUTION, CHD_GREEN, URL, URL_LABELS, engine
from utils.chart_utils import create_return_period_plot, create_timeseries_plot
from utils.data_utils import (calculate_return_periods, fetch_flood_data,
get_summary, process_flood_data)
from utils.log_utils import get_logger

logger = get_logger("callbacks")

Expand Down Expand Up @@ -68,7 +72,9 @@ def set_adm_value(adm_level):
id="geojson",
style=style_handle,
hideout=dict(selected=""),
hoverStyle=arrow_function({"fillColor": "#1f77b4", "fillOpacity": 0.8}),
hoverStyle=arrow_function(
{"fillColor": "#1f77b4", "fillOpacity": 0.8}
),
zoomToBounds=True,
)
adm0 = dl.GeoJSON(
Expand Down Expand Up @@ -110,13 +116,16 @@ def update_plot(pcode, adm_level):
"",
)
df_exposure, df_adm = fetch_flood_data(engine, pcode, adm_level)
df_exposure = df_exposure.sort_values("date")

if len(df_exposure) == 0:
logger.warning(f"No data available for {pcode}")
return (
[
dmc.Space(h=100),
dmc.Center(html.Div("No data available for selected location")),
dmc.Center(
html.Div("No data available for selected location")
),
],
dmc.Center("No data available"),
"",
Expand All @@ -142,7 +151,9 @@ def update_plot(pcode, adm_level):
return exposure_chart, rp_chart, name, exposed_summary

# TODO: Would be better as a clientside callback, but couldn't seem to get it to work...
@app.callback(Output("hover-place-name", "children"), Input("geojson", "hoverData"))
@app.callback(
Output("hover-place-name", "children"), Input("geojson", "hoverData")
)
def info_hover(feature):
if feature:
return feature["properties"]["name"]
12 changes: 7 additions & 5 deletions layouts/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def info_container():
"""
Flood extent data is from [Floodscan](https://www.aer.com/weather-risk-management/floodscan-near-real-time-and-historical-flood-mapping/).
Population distributions are from [WorldPop](https://www.worldpop.org/). Administrative boundaries are from [FieldMaps](https://fieldmaps.io/).
"""
""" # noqa
),
],
title="Data Sources",
Expand All @@ -103,15 +103,15 @@ def info_container():
extent is less than 5% to reduce noise. The daily exposure rasters are then aggregated to the admin2 level.
This is similar to the [method](https://docs.google.com/document/d/16-TrPdCF7dCx5thpdA7dXB8k1MUOJUovWaRVIjEJNUE/edit?tab=t.0#heading=h.rtvq16oq23gp)
initially developed for the 2024 Somalia HNRP. Admin0 and admin1 exposure is calculated simply by summing the admin2 exposures.
"""
""" # noqa
),
dcc.Markdown(
"""
Return period is calculated empirically, by ranking each year's flood exposure. The maximum flood exposure to date
for all admin levels is taken taken as the maximum instantaneous flood exposure for any day in the year
(up to the current day of the year). Note that this does not take into account flooding in one part of the
area on one day and another part on another day. In this case, the yearly maximum would be the maximum of these values, not the sum.
"""
""" # noqa
),
],
title="Methodology",
Expand All @@ -121,7 +121,7 @@ def info_container():
"""
The code used to calculate the daily flood exposure is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring).
The code used to calculate return period and run this app is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring-app).
"""
""" # noqa
),
title="Resources",
),
Expand Down Expand Up @@ -192,7 +192,9 @@ def chart_container():
)
severity_tab = html.Div(
style={"backgroundColor": "white", "width": "100%", "height": "100%"},
children=dmc.LoadingOverlay(html.Div(id="rp-chart"), style={"height": "100%"}),
children=dmc.LoadingOverlay(
html.Div(id="rp-chart"), style={"height": "100%"}
),
)
return dbc.Tabs(
[
Expand Down
3 changes: 2 additions & 1 deletion layouts/modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ def disclaimer_modal():
return dbc.Modal(
[
dbc.ModalHeader(
dbc.ModalTitle("Disclaimer", className="header"), close_button=True
dbc.ModalTitle("Disclaimer", className="header"),
close_button=True,
),
dbc.ModalBody(
[
Expand Down
9 changes: 7 additions & 2 deletions layouts/navbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def navbar():
[
dbc.Col(
html.Img(
src="assets/centre_banner_greenbg.png", height=40
src="assets/centre_banner_greenbg.png",
height=40,
),
),
dbc.Col(
Expand All @@ -33,7 +34,11 @@ def navbar():
],
fluid=True,
),
style={"height": f"{NAVBAR_HEIGHT}px", "margin": "0px", "padding": "10px"},
style={
"height": f"{NAVBAR_HEIGHT}px",
"margin": "0px",
"padding": "10px",
},
color="primary",
dark=True,
)
Expand Down
15 changes: 11 additions & 4 deletions pipelines/blob_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def get_container_client(
):
sas = DEV_BLOB_SAS if stage == "dev" else PROD_BLOB_SAS
container_url = (
f"https://imb0chd0{stage}.blob.core.windows.net/" f"{container_name}?{sas}"
f"https://imb0chd0{stage}.blob.core.windows.net/"
f"{container_name}?{sas}"
)
return ContainerClient.from_container_url(container_url)

Expand All @@ -31,7 +32,9 @@ def load_parquet_from_blob(
stage: Literal["prod", "dev"] = "dev",
container_name: str = "projects",
):
blob_data = load_blob_data(blob_name, stage=stage, container_name=container_name)
blob_data = load_blob_data(
blob_name, stage=stage, container_name=container_name
)
return pd.read_parquet(io.BytesIO(blob_data))


Expand All @@ -42,7 +45,9 @@ def load_gdf_from_blob(
with zipfile.ZipFile(io.BytesIO(blob_data), "r") as zip_ref:
zip_ref.extractall("temp")
if shapefile is None:
shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][0]
shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][
0
]
gdf = gpd.read_file(f"temp/{shapefile}")
return gdf

Expand All @@ -52,7 +57,9 @@ def load_blob_data(
stage: Literal["prod", "dev"] = "dev",
container_name: str = "projects",
):
container_client = get_container_client(stage=stage, container_name=container_name)
container_client = get_container_client(
stage=stage, container_name=container_name
)
blob_client = container_client.get_blob_client(blob_name)
data = blob_client.download_blob().readall()
return data
6 changes: 4 additions & 2 deletions populate_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def load_data(engine):
)

def calculate_rolling(group, window=7):
group[f"roll{window}"] = group["total_exposed"].rolling(window=window).mean()
group[f"roll{window}"] = (
group["total_exposed"].rolling(window=window).mean()
)
return group

window = 7
Expand Down Expand Up @@ -94,7 +96,7 @@ def calculate_rolling(group, window=7):
if __name__ == "__main__":
print("Populating database...")
engine = create_engine(
f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres"
f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres" # noqa
)
data = load_data(engine)
print("Database update completed.")
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tool.black]
line-length = 79

[tool.isort]
profile = "black"
line_length = 79
21 changes: 17 additions & 4 deletions utils/chart_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
"""Create timeseries plot using Plotly."""
df_seasonal = df_seasonal.sort_values("eff_date")
df_processed = df_processed.sort_values("date", ascending=False)
fig = go.Figure()

# Add seasonal average
Expand All @@ -18,7 +20,13 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):

# Add yearly traces
for year in df_processed["date"].dt.year.unique():
color = CHD_GREEN if year == 2024 else "red" if year in peak_years else "grey"
color = (
CHD_GREEN
if year == 2024
else "red"
if year in peak_years
else "grey"
)
linewidth = 3 if year == 2024 else 0.2

df_year = df_processed[df_processed["date"].dt.year == year]
Expand All @@ -40,8 +48,11 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
margin={"t": 10, "l": 0, "r": 0, "b": 0},
font=dict(family="Arial, sans-serif"),
)
fig.update_yaxes(rangemode="tozero", title="Population exposed to flooding")
fig.update_xaxes(title="Date")
fig.update_yaxes(
rangemode="tozero", title="Population exposed to flooding"
)
# set x max to year 1900
fig.update_xaxes(title="Date", range=["1900-01-01", "1900-12-31"])

return fig

Expand Down Expand Up @@ -86,7 +97,9 @@ def create_return_period_plot(df_peaks, CHD_GREEN, rp=3):
)

# Add other significant years
df_rp_peaks = df_peaks[(df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)]
df_rp_peaks = df_peaks[
(df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)
]
fig.add_trace(
go.Scatter(
x=df_rp_peaks["rp"],
Expand Down
32 changes: 21 additions & 11 deletions utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pandas as pd
from sqlalchemy import text

from utils.log_utils import get_logger

logger = get_logger("data")
Expand All @@ -17,12 +16,16 @@ def fetch_flood_data(engine, pcode, adm_level):
logger.info(f"Getting flood exposure data for {pcode}...")
start = time.time()
with engine.connect() as con:
df_exposure = pd.read_sql_query(query_exposure, con, params={"pcode": pcode})
df_exposure = pd.read_sql_query(
query_exposure, con, params={"pcode": pcode}
)
df_adm = pd.read_sql_query(query_adm, con)
df_adm = df_adm[df_adm[f"adm{adm_level}_pcode"] == pcode]

elapsed = time.time() - start
logger.debug(f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s")
logger.debug(
f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s"
)
return df_exposure, df_adm


Expand All @@ -37,17 +40,19 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
.mean()
.reset_index()
)
df_seasonal["eff_date"] = pd.to_datetime(df_seasonal["dayofyear"], format="%j")
df_seasonal["eff_date"] = pd.to_datetime(
df_seasonal["dayofyear"], format="%j"
)

# Filter data
today_dayofyear = df_exposure.iloc[-1]["dayofyear"]
df_to_today = df_exposure[df_exposure["dayofyear"] <= today_dayofyear]

# Calculate peaks
df_peaks = (
df_to_today.groupby([df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"])[
val_col
]
df_to_today.groupby(
[df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"]
)[val_col]
.max()
.reset_index()
)
Expand All @@ -68,7 +73,10 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
.groupby("eff_date")[val_col]
.sum()
.reset_index(),
p[p["adm1_pcode"] == pcode].groupby("date")[val_col].sum().reset_index(),
p[p["adm1_pcode"] == pcode]
.groupby("date")[val_col]
.sum()
.reset_index(),
),
"2": lambda d, s, p: (
d[d["adm2_pcode"] == pcode],
Expand All @@ -77,10 +85,12 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
),
}

df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[adm_level](
df_exposure, df_seasonal, df_peaks
df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[
adm_level
](df_exposure, df_seasonal, df_peaks)
df_processed["eff_date"] = pd.to_datetime(
df_processed["dayofyear"], format="%j"
)
df_processed["eff_date"] = pd.to_datetime(df_processed["dayofyear"], format="%j")

return df_processed, df_seasonal_final, df_peaks_final

Expand Down

0 comments on commit 186812b

Please sign in to comment.