Merge pull request #30 from OCHA-DAP/fix-timeseries

sort dates on timeseries plot
OCHA-DAP · Nov 27, 2024 · 186812b · 186812b
2 parents 34f3106 + 50c1a2a
commit 186812b
Show file tree

Hide file tree

Showing 10 changed files with 94 additions and 37 deletions.
diff --git a/app.py b/app.py
@@ -1,10 +1,10 @@
 from dash import Dash, dcc
+from utils.log_utils import setup_logging
 
 from callbacks.callbacks import register_callbacks
 from layouts.content import content
 from layouts.modal import disclaimer_modal
 from layouts.navbar import module_bar, navbar
-from utils.log_utils import setup_logging
 
 app = Dash(__name__, update_title=None, suppress_callback_exceptions=True)
 server = app.server

diff --git a/callbacks/callbacks.py b/callbacks/callbacks.py
@@ -2,13 +2,17 @@
 import dash_mantine_components as dmc
 from dash import Input, Output, State, dcc, html, no_update
 from dash_extensions.javascript import arrow_function, assign
+from utils.chart_utils import create_return_period_plot, create_timeseries_plot
+from utils.data_utils import (
+    calculate_return_periods,
+    fetch_flood_data,
+    get_summary,
+    process_flood_data,
+)
+from utils.log_utils import get_logger
 
 # TODO: Be more careful with engine?
 from constants import ATTRIBUTION, CHD_GREEN, URL, URL_LABELS, engine
-from utils.chart_utils import create_return_period_plot, create_timeseries_plot
-from utils.data_utils import (calculate_return_periods, fetch_flood_data,
-                              get_summary, process_flood_data)
-from utils.log_utils import get_logger
 
 logger = get_logger("callbacks")
 
@@ -68,7 +72,9 @@ def set_adm_value(adm_level):
             id="geojson",
             style=style_handle,
             hideout=dict(selected=""),
-            hoverStyle=arrow_function({"fillColor": "#1f77b4", "fillOpacity": 0.8}),
+            hoverStyle=arrow_function(
+                {"fillColor": "#1f77b4", "fillOpacity": 0.8}
+            ),
             zoomToBounds=True,
         )
         adm0 = dl.GeoJSON(
@@ -110,13 +116,16 @@ def update_plot(pcode, adm_level):
                 "",
             )
         df_exposure, df_adm = fetch_flood_data(engine, pcode, adm_level)
+        df_exposure = df_exposure.sort_values("date")
 
         if len(df_exposure) == 0:
             logger.warning(f"No data available for {pcode}")
             return (
                 [
                     dmc.Space(h=100),
-                    dmc.Center(html.Div("No data available for selected location")),
+                    dmc.Center(
+                        html.Div("No data available for selected location")
+                    ),
                 ],
                 dmc.Center("No data available"),
                 "",
@@ -142,7 +151,9 @@ def update_plot(pcode, adm_level):
         return exposure_chart, rp_chart, name, exposed_summary
 
     # TODO: Would be better as a clientside callback, but couldn't seem to get it to work...
-    @app.callback(Output("hover-place-name", "children"), Input("geojson", "hoverData"))
+    @app.callback(
+        Output("hover-place-name", "children"), Input("geojson", "hoverData")
+    )
     def info_hover(feature):
         if feature:
             return feature["properties"]["name"]
diff --git a/layouts/content.py b/layouts/content.py
@@ -89,7 +89,7 @@ def info_container():
                                 """
                                 Flood extent data is from [Floodscan](https://www.aer.com/weather-risk-management/floodscan-near-real-time-and-historical-flood-mapping/).
                                 Population distributions are from [WorldPop](https://www.worldpop.org/). Administrative boundaries are from [FieldMaps](https://fieldmaps.io/).
-                                """
+                                """  # noqa
                             ),
                         ],
                         title="Data Sources",
@@ -103,15 +103,15 @@ def info_container():
                     extent is  less than 5% to reduce noise. The daily exposure rasters are then  aggregated to the admin2 level.
                     This is similar to the [method](https://docs.google.com/document/d/16-TrPdCF7dCx5thpdA7dXB8k1MUOJUovWaRVIjEJNUE/edit?tab=t.0#heading=h.rtvq16oq23gp)
                     initially developed for the 2024 Somalia HNRP. Admin0 and admin1 exposure is calculated simply by summing the admin2 exposures.
-                    """
+                    """  # noqa
                             ),
                             dcc.Markdown(
                                 """
                     Return  period is calculated empirically, by ranking each year's flood  exposure. The maximum flood exposure to date
                     for all admin levels is  taken taken as the maximum instantaneous flood exposure for any day in  the year
                     (up to the current day of the year). Note that this does not  take into account flooding in one part of the
                     area on one day and  another part on another day. In this case, the yearly maximum would be  the maximum of these values, not the sum.
-                    """
+                    """  # noqa
                             ),
                         ],
                         title="Methodology",
@@ -121,7 +121,7 @@ def info_container():
                             """
                         The code used to calculate the daily flood exposure is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring).
                         The code used to calculate return period and run this app is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring-app).
-                        """
+                        """  # noqa
                         ),
                         title="Resources",
                     ),
@@ -192,7 +192,9 @@ def chart_container():
     )
     severity_tab = html.Div(
         style={"backgroundColor": "white", "width": "100%", "height": "100%"},
-        children=dmc.LoadingOverlay(html.Div(id="rp-chart"), style={"height": "100%"}),
+        children=dmc.LoadingOverlay(
+            html.Div(id="rp-chart"), style={"height": "100%"}
+        ),
     )
     return dbc.Tabs(
         [

diff --git a/layouts/modal.py b/layouts/modal.py
@@ -6,7 +6,8 @@ def disclaimer_modal():
     return dbc.Modal(
         [
             dbc.ModalHeader(
-                dbc.ModalTitle("Disclaimer", className="header"), close_button=True
+                dbc.ModalTitle("Disclaimer", className="header"),
+                close_button=True,
             ),
             dbc.ModalBody(
                 [

diff --git a/layouts/navbar.py b/layouts/navbar.py
@@ -13,7 +13,8 @@ def navbar():
                         [
                             dbc.Col(
                                 html.Img(
-                                    src="assets/centre_banner_greenbg.png", height=40
+                                    src="assets/centre_banner_greenbg.png",
+                                    height=40,
                                 ),
                             ),
                             dbc.Col(
@@ -33,7 +34,11 @@ def navbar():
             ],
             fluid=True,
         ),
-        style={"height": f"{NAVBAR_HEIGHT}px", "margin": "0px", "padding": "10px"},
+        style={
+            "height": f"{NAVBAR_HEIGHT}px",
+            "margin": "0px",
+            "padding": "10px",
+        },
         color="primary",
         dark=True,
     )

diff --git a/pipelines/blob_utils.py b/pipelines/blob_utils.py
@@ -21,7 +21,8 @@ def get_container_client(
 ):
     sas = DEV_BLOB_SAS if stage == "dev" else PROD_BLOB_SAS
     container_url = (
-        f"https://imb0chd0{stage}.blob.core.windows.net/" f"{container_name}?{sas}"
+        f"https://imb0chd0{stage}.blob.core.windows.net/"
+        f"{container_name}?{sas}"
     )
     return ContainerClient.from_container_url(container_url)
 
@@ -31,7 +32,9 @@ def load_parquet_from_blob(
     stage: Literal["prod", "dev"] = "dev",
     container_name: str = "projects",
 ):
-    blob_data = load_blob_data(blob_name, stage=stage, container_name=container_name)
+    blob_data = load_blob_data(
+        blob_name, stage=stage, container_name=container_name
+    )
     return pd.read_parquet(io.BytesIO(blob_data))
 
 
@@ -42,7 +45,9 @@ def load_gdf_from_blob(
     with zipfile.ZipFile(io.BytesIO(blob_data), "r") as zip_ref:
         zip_ref.extractall("temp")
         if shapefile is None:
-            shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][0]
+            shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][
+                0
+            ]
         gdf = gpd.read_file(f"temp/{shapefile}")
     return gdf
 
@@ -52,7 +57,9 @@ def load_blob_data(
     stage: Literal["prod", "dev"] = "dev",
     container_name: str = "projects",
 ):
-    container_client = get_container_client(stage=stage, container_name=container_name)
+    container_client = get_container_client(
+        stage=stage, container_name=container_name
+    )
     blob_client = container_client.get_blob_client(blob_name)
     data = blob_client.download_blob().readall()
     return data
diff --git a/populate_database.py b/populate_database.py
@@ -48,7 +48,9 @@ def load_data(engine):
     )
 
     def calculate_rolling(group, window=7):
-        group[f"roll{window}"] = group["total_exposed"].rolling(window=window).mean()
+        group[f"roll{window}"] = (
+            group["total_exposed"].rolling(window=window).mean()
+        )
         return group
 
     window = 7
@@ -94,7 +96,7 @@ def calculate_rolling(group, window=7):
 if __name__ == "__main__":
     print("Populating database...")
     engine = create_engine(
-        f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres"
+        f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres"  # noqa
     )
     data = load_data(engine)
     print("Database update completed.")
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,6 @@
+[tool.black]
+line-length = 79
+
+[tool.isort]
+profile = "black"
+line_length = 79
diff --git a/utils/chart_utils.py b/utils/chart_utils.py
@@ -3,6 +3,8 @@
 
 def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
     """Create timeseries plot using Plotly."""
+    df_seasonal = df_seasonal.sort_values("eff_date")
+    df_processed = df_processed.sort_values("date", ascending=False)
     fig = go.Figure()
 
     # Add seasonal average
@@ -18,7 +20,13 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
 
     # Add yearly traces
     for year in df_processed["date"].dt.year.unique():
-        color = CHD_GREEN if year == 2024 else "red" if year in peak_years else "grey"
+        color = (
+            CHD_GREEN
+            if year == 2024
+            else "red"
+            if year in peak_years
+            else "grey"
+        )
         linewidth = 3 if year == 2024 else 0.2
 
         df_year = df_processed[df_processed["date"].dt.year == year]
@@ -40,8 +48,11 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
         margin={"t": 10, "l": 0, "r": 0, "b": 0},
         font=dict(family="Arial, sans-serif"),
     )
-    fig.update_yaxes(rangemode="tozero", title="Population exposed to flooding")
-    fig.update_xaxes(title="Date")
+    fig.update_yaxes(
+        rangemode="tozero", title="Population exposed to flooding"
+    )
+    # set x max to year 1900
+    fig.update_xaxes(title="Date", range=["1900-01-01", "1900-12-31"])
 
     return fig
 
@@ -86,7 +97,9 @@ def create_return_period_plot(df_peaks, CHD_GREEN, rp=3):
     )
 
     # Add other significant years
-    df_rp_peaks = df_peaks[(df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)]
+    df_rp_peaks = df_peaks[
+        (df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)
+    ]
     fig.add_trace(
         go.Scatter(
             x=df_rp_peaks["rp"],

diff --git a/utils/data_utils.py b/utils/data_utils.py
@@ -2,7 +2,6 @@
 
 import pandas as pd
 from sqlalchemy import text
-
 from utils.log_utils import get_logger
 
 logger = get_logger("data")
@@ -17,12 +16,16 @@ def fetch_flood_data(engine, pcode, adm_level):
     logger.info(f"Getting flood exposure data for {pcode}...")
     start = time.time()
     with engine.connect() as con:
-        df_exposure = pd.read_sql_query(query_exposure, con, params={"pcode": pcode})
+        df_exposure = pd.read_sql_query(
+            query_exposure, con, params={"pcode": pcode}
+        )
         df_adm = pd.read_sql_query(query_adm, con)
         df_adm = df_adm[df_adm[f"adm{adm_level}_pcode"] == pcode]
 
     elapsed = time.time() - start
-    logger.debug(f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s")
+    logger.debug(
+        f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s"
+    )
     return df_exposure, df_adm
 
 
@@ -37,17 +40,19 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
         .mean()
         .reset_index()
     )
-    df_seasonal["eff_date"] = pd.to_datetime(df_seasonal["dayofyear"], format="%j")
+    df_seasonal["eff_date"] = pd.to_datetime(
+        df_seasonal["dayofyear"], format="%j"
+    )
 
     # Filter data
     today_dayofyear = df_exposure.iloc[-1]["dayofyear"]
     df_to_today = df_exposure[df_exposure["dayofyear"] <= today_dayofyear]
 
     # Calculate peaks
     df_peaks = (
-        df_to_today.groupby([df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"])[
-            val_col
-        ]
+        df_to_today.groupby(
+            [df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"]
+        )[val_col]
         .max()
         .reset_index()
     )
@@ -68,7 +73,10 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
             .groupby("eff_date")[val_col]
             .sum()
             .reset_index(),
-            p[p["adm1_pcode"] == pcode].groupby("date")[val_col].sum().reset_index(),
+            p[p["adm1_pcode"] == pcode]
+            .groupby("date")[val_col]
+            .sum()
+            .reset_index(),
         ),
         "2": lambda d, s, p: (
             d[d["adm2_pcode"] == pcode],
@@ -77,10 +85,12 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
         ),
     }
 
-    df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[adm_level](
-        df_exposure, df_seasonal, df_peaks
+    df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[
+        adm_level
+    ](df_exposure, df_seasonal, df_peaks)
+    df_processed["eff_date"] = pd.to_datetime(
+        df_processed["dayofyear"], format="%j"
     )
-    df_processed["eff_date"] = pd.to_datetime(df_processed["dayofyear"], format="%j")
 
     return df_processed, df_seasonal_final, df_peaks_final