From d37c427dac3b2dcd5c737ab614dc116fa22ea576 Mon Sep 17 00:00:00 2001
From: t-downing <downing.tristan@gmail.com>
Date: Wed, 27 Nov 2024 10:55:45 -0800
Subject: [PATCH 1/2] sort dates

---
 callbacks/callbacks.py | 25 ++++++++++++++++++-------
 pyproject.toml         |  6 ++++++
 utils/chart_utils.py   | 21 +++++++++++++++++----
 3 files changed, 41 insertions(+), 11 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/callbacks/callbacks.py b/callbacks/callbacks.py
index de70864..a48fc94 100644
--- a/callbacks/callbacks.py
+++ b/callbacks/callbacks.py
@@ -2,13 +2,17 @@
 import dash_mantine_components as dmc
 from dash import Input, Output, State, dcc, html, no_update
 from dash_extensions.javascript import arrow_function, assign
+from utils.chart_utils import create_return_period_plot, create_timeseries_plot
+from utils.data_utils import (
+    calculate_return_periods,
+    fetch_flood_data,
+    get_summary,
+    process_flood_data,
+)
+from utils.log_utils import get_logger
 
 # TODO: Be more careful with engine?
 from constants import ATTRIBUTION, CHD_GREEN, URL, URL_LABELS, engine
-from utils.chart_utils import create_return_period_plot, create_timeseries_plot
-from utils.data_utils import (calculate_return_periods, fetch_flood_data,
-                              get_summary, process_flood_data)
-from utils.log_utils import get_logger
 
 logger = get_logger("callbacks")
 
@@ -68,7 +72,9 @@ def set_adm_value(adm_level):
             id="geojson",
             style=style_handle,
             hideout=dict(selected=""),
-            hoverStyle=arrow_function({"fillColor": "#1f77b4", "fillOpacity": 0.8}),
+            hoverStyle=arrow_function(
+                {"fillColor": "#1f77b4", "fillOpacity": 0.8}
+            ),
             zoomToBounds=True,
         )
         adm0 = dl.GeoJSON(
@@ -110,13 +116,16 @@ def update_plot(pcode, adm_level):
                 "",
             )
         df_exposure, df_adm = fetch_flood_data(engine, pcode, adm_level)
+        df_exposure = df_exposure.sort_values("date")
 
         if len(df_exposure) == 0:
             logger.warning(f"No data available for {pcode}")
             return (
                 [
                     dmc.Space(h=100),
-                    dmc.Center(html.Div("No data available for selected location")),
+                    dmc.Center(
+                        html.Div("No data available for selected location")
+                    ),
                 ],
                 dmc.Center("No data available"),
                 "",
@@ -142,7 +151,9 @@ def update_plot(pcode, adm_level):
         return exposure_chart, rp_chart, name, exposed_summary
 
     # TODO: Would be better as a clientside callback, but couldn't seem to get it to work...
-    @app.callback(Output("hover-place-name", "children"), Input("geojson", "hoverData"))
+    @app.callback(
+        Output("hover-place-name", "children"), Input("geojson", "hoverData")
+    )
     def info_hover(feature):
         if feature:
             return feature["properties"]["name"]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..d84cc51
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,6 @@
+[tool.black]
+line-length = 79
+
+[tool.isort]
+profile = "black"
+line_length = 79
diff --git a/utils/chart_utils.py b/utils/chart_utils.py
index e550742..62fa67d 100644
--- a/utils/chart_utils.py
+++ b/utils/chart_utils.py
@@ -3,6 +3,8 @@
 
 def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
     """Create timeseries plot using Plotly."""
+    df_seasonal = df_seasonal.sort_values("eff_date")
+    df_processed = df_processed.sort_values("date", ascending=False)
     fig = go.Figure()
 
     # Add seasonal average
@@ -18,7 +20,13 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
 
     # Add yearly traces
     for year in df_processed["date"].dt.year.unique():
-        color = CHD_GREEN if year == 2024 else "red" if year in peak_years else "grey"
+        color = (
+            CHD_GREEN
+            if year == 2024
+            else "red"
+            if year in peak_years
+            else "grey"
+        )
         linewidth = 3 if year == 2024 else 0.2
 
         df_year = df_processed[df_processed["date"].dt.year == year]
@@ -40,8 +48,11 @@ def create_timeseries_plot(df_seasonal, df_processed, peak_years, CHD_GREEN):
         margin={"t": 10, "l": 0, "r": 0, "b": 0},
         font=dict(family="Arial, sans-serif"),
     )
-    fig.update_yaxes(rangemode="tozero", title="Population exposed to flooding")
-    fig.update_xaxes(title="Date")
+    fig.update_yaxes(
+        rangemode="tozero", title="Population exposed to flooding"
+    )
+    # set x max to year 1900
+    fig.update_xaxes(title="Date", range=["1900-01-01", "1900-12-31"])
 
     return fig
 
@@ -86,7 +97,9 @@ def create_return_period_plot(df_peaks, CHD_GREEN, rp=3):
     )
 
     # Add other significant years
-    df_rp_peaks = df_peaks[(df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)]
+    df_rp_peaks = df_peaks[
+        (df_peaks[f"{rp}yr_rp"]) & (df_peaks["date"] != 2024)
+    ]
     fig.add_trace(
         go.Scatter(
             x=df_rp_peaks["rp"],

From 50c1a2a8110eeb323fe994645cf85c2dde6ae24a Mon Sep 17 00:00:00 2001
From: t-downing <downing.tristan@gmail.com>
Date: Wed, 27 Nov 2024 11:18:04 -0800
Subject: [PATCH 2/2] flake8 run

---
 app.py                  |  2 +-
 layouts/content.py      | 12 +++++++-----
 layouts/modal.py        |  3 ++-
 layouts/navbar.py       |  9 +++++++--
 pipelines/blob_utils.py | 15 +++++++++++----
 populate_database.py    |  6 ++++--
 utils/data_utils.py     | 32 +++++++++++++++++++++-----------
 7 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/app.py b/app.py
index b7311db..7e13e13 100644
--- a/app.py
+++ b/app.py
@@ -1,10 +1,10 @@
 from dash import Dash, dcc
+from utils.log_utils import setup_logging
 
 from callbacks.callbacks import register_callbacks
 from layouts.content import content
 from layouts.modal import disclaimer_modal
 from layouts.navbar import module_bar, navbar
-from utils.log_utils import setup_logging
 
 app = Dash(__name__, update_title=None, suppress_callback_exceptions=True)
 server = app.server
diff --git a/layouts/content.py b/layouts/content.py
index a0c6037..b7f6490 100644
--- a/layouts/content.py
+++ b/layouts/content.py
@@ -89,7 +89,7 @@ def info_container():
                                 """
                                 Flood extent data is from [Floodscan](https://www.aer.com/weather-risk-management/floodscan-near-real-time-and-historical-flood-mapping/).
                                 Population distributions are from [WorldPop](https://www.worldpop.org/). Administrative boundaries are from [FieldMaps](https://fieldmaps.io/).
-                                """
+                                """  # noqa
                             ),
                         ],
                         title="Data Sources",
@@ -103,7 +103,7 @@ def info_container():
                     extent is  less than 5% to reduce noise. The daily exposure rasters are then  aggregated to the admin2 level.
                     This is similar to the [method](https://docs.google.com/document/d/16-TrPdCF7dCx5thpdA7dXB8k1MUOJUovWaRVIjEJNUE/edit?tab=t.0#heading=h.rtvq16oq23gp)
                     initially developed for the 2024 Somalia HNRP. Admin0 and admin1 exposure is calculated simply by summing the admin2 exposures.
-                    """
+                    """  # noqa
                             ),
                             dcc.Markdown(
                                 """
@@ -111,7 +111,7 @@ def info_container():
                     for all admin levels is  taken taken as the maximum instantaneous flood exposure for any day in  the year
                     (up to the current day of the year). Note that this does not  take into account flooding in one part of the
                     area on one day and  another part on another day. In this case, the yearly maximum would be  the maximum of these values, not the sum.
-                    """
+                    """  # noqa
                             ),
                         ],
                         title="Methodology",
@@ -121,7 +121,7 @@ def info_container():
                             """
                         The code used to calculate the daily flood exposure is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring).
                         The code used to calculate return period and run this app is available on GitHub [here](https://github.com/OCHA-DAP/ds-floodexposure-monitoring-app).
-                        """
+                        """  # noqa
                         ),
                         title="Resources",
                     ),
@@ -192,7 +192,9 @@ def chart_container():
     )
     severity_tab = html.Div(
         style={"backgroundColor": "white", "width": "100%", "height": "100%"},
-        children=dmc.LoadingOverlay(html.Div(id="rp-chart"), style={"height": "100%"}),
+        children=dmc.LoadingOverlay(
+            html.Div(id="rp-chart"), style={"height": "100%"}
+        ),
     )
     return dbc.Tabs(
         [
diff --git a/layouts/modal.py b/layouts/modal.py
index 86f674f..b85e33c 100644
--- a/layouts/modal.py
+++ b/layouts/modal.py
@@ -6,7 +6,8 @@ def disclaimer_modal():
     return dbc.Modal(
         [
             dbc.ModalHeader(
-                dbc.ModalTitle("Disclaimer", className="header"), close_button=True
+                dbc.ModalTitle("Disclaimer", className="header"),
+                close_button=True,
             ),
             dbc.ModalBody(
                 [
diff --git a/layouts/navbar.py b/layouts/navbar.py
index 2ed5e70..3b21b19 100644
--- a/layouts/navbar.py
+++ b/layouts/navbar.py
@@ -13,7 +13,8 @@ def navbar():
                         [
                             dbc.Col(
                                 html.Img(
-                                    src="assets/centre_banner_greenbg.png", height=40
+                                    src="assets/centre_banner_greenbg.png",
+                                    height=40,
                                 ),
                             ),
                             dbc.Col(
@@ -33,7 +34,11 @@ def navbar():
             ],
             fluid=True,
         ),
-        style={"height": f"{NAVBAR_HEIGHT}px", "margin": "0px", "padding": "10px"},
+        style={
+            "height": f"{NAVBAR_HEIGHT}px",
+            "margin": "0px",
+            "padding": "10px",
+        },
         color="primary",
         dark=True,
     )
diff --git a/pipelines/blob_utils.py b/pipelines/blob_utils.py
index b05acb1..6d5228d 100644
--- a/pipelines/blob_utils.py
+++ b/pipelines/blob_utils.py
@@ -21,7 +21,8 @@ def get_container_client(
 ):
     sas = DEV_BLOB_SAS if stage == "dev" else PROD_BLOB_SAS
     container_url = (
-        f"https://imb0chd0{stage}.blob.core.windows.net/" f"{container_name}?{sas}"
+        f"https://imb0chd0{stage}.blob.core.windows.net/"
+        f"{container_name}?{sas}"
     )
     return ContainerClient.from_container_url(container_url)
 
@@ -31,7 +32,9 @@ def load_parquet_from_blob(
     stage: Literal["prod", "dev"] = "dev",
     container_name: str = "projects",
 ):
-    blob_data = load_blob_data(blob_name, stage=stage, container_name=container_name)
+    blob_data = load_blob_data(
+        blob_name, stage=stage, container_name=container_name
+    )
     return pd.read_parquet(io.BytesIO(blob_data))
 
 
@@ -42,7 +45,9 @@ def load_gdf_from_blob(
     with zipfile.ZipFile(io.BytesIO(blob_data), "r") as zip_ref:
         zip_ref.extractall("temp")
         if shapefile is None:
-            shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][0]
+            shapefile = [f for f in zip_ref.namelist() if f.endswith(".shp")][
+                0
+            ]
         gdf = gpd.read_file(f"temp/{shapefile}")
     return gdf
 
@@ -52,7 +57,9 @@ def load_blob_data(
     stage: Literal["prod", "dev"] = "dev",
     container_name: str = "projects",
 ):
-    container_client = get_container_client(stage=stage, container_name=container_name)
+    container_client = get_container_client(
+        stage=stage, container_name=container_name
+    )
     blob_client = container_client.get_blob_client(blob_name)
     data = blob_client.download_blob().readall()
     return data
diff --git a/populate_database.py b/populate_database.py
index e862687..f75a767 100644
--- a/populate_database.py
+++ b/populate_database.py
@@ -48,7 +48,9 @@ def load_data(engine):
     )
 
     def calculate_rolling(group, window=7):
-        group[f"roll{window}"] = group["total_exposed"].rolling(window=window).mean()
+        group[f"roll{window}"] = (
+            group["total_exposed"].rolling(window=window).mean()
+        )
         return group
 
     window = 7
@@ -94,7 +96,7 @@ def calculate_rolling(group, window=7):
 if __name__ == "__main__":
     print("Populating database...")
     engine = create_engine(
-        f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres"
+        f"postgresql+psycopg2://{AZURE_DB_UID}:{AZURE_DB_PW_DEV}@chd-rasterstats-dev.postgres.database.azure.com/postgres"  # noqa
     )
     data = load_data(engine)
     print("Database update completed.")
diff --git a/utils/data_utils.py b/utils/data_utils.py
index e227706..b64cb71 100644
--- a/utils/data_utils.py
+++ b/utils/data_utils.py
@@ -2,7 +2,6 @@
 
 import pandas as pd
 from sqlalchemy import text
-
 from utils.log_utils import get_logger
 
 logger = get_logger("data")
@@ -17,12 +16,16 @@ def fetch_flood_data(engine, pcode, adm_level):
     logger.info(f"Getting flood exposure data for {pcode}...")
     start = time.time()
     with engine.connect() as con:
-        df_exposure = pd.read_sql_query(query_exposure, con, params={"pcode": pcode})
+        df_exposure = pd.read_sql_query(
+            query_exposure, con, params={"pcode": pcode}
+        )
         df_adm = pd.read_sql_query(query_adm, con)
         df_adm = df_adm[df_adm[f"adm{adm_level}_pcode"] == pcode]
 
     elapsed = time.time() - start
-    logger.debug(f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s")
+    logger.debug(
+        f"Retrieved {len(df_exposure)} rows from database in {elapsed:.2f}s"
+    )
     return df_exposure, df_adm
 
 
@@ -37,7 +40,9 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
         .mean()
         .reset_index()
     )
-    df_seasonal["eff_date"] = pd.to_datetime(df_seasonal["dayofyear"], format="%j")
+    df_seasonal["eff_date"] = pd.to_datetime(
+        df_seasonal["dayofyear"], format="%j"
+    )
 
     # Filter data
     today_dayofyear = df_exposure.iloc[-1]["dayofyear"]
@@ -45,9 +50,9 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
 
     # Calculate peaks
     df_peaks = (
-        df_to_today.groupby([df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"])[
-            val_col
-        ]
+        df_to_today.groupby(
+            [df_to_today["date"].dt.year, "adm1_pcode", "adm2_pcode"]
+        )[val_col]
         .max()
         .reset_index()
     )
@@ -68,7 +73,10 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
             .groupby("eff_date")[val_col]
             .sum()
             .reset_index(),
-            p[p["adm1_pcode"] == pcode].groupby("date")[val_col].sum().reset_index(),
+            p[p["adm1_pcode"] == pcode]
+            .groupby("date")[val_col]
+            .sum()
+            .reset_index(),
         ),
         "2": lambda d, s, p: (
             d[d["adm2_pcode"] == pcode],
@@ -77,10 +85,12 @@ def process_flood_data(df_exposure, pcode, adm_level, window=7):
         ),
     }
 
-    df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[adm_level](
-        df_exposure, df_seasonal, df_peaks
+    df_processed, df_seasonal_final, df_peaks_final = aggregation_funcs[
+        adm_level
+    ](df_exposure, df_seasonal, df_peaks)
+    df_processed["eff_date"] = pd.to_datetime(
+        df_processed["dayofyear"], format="%j"
     )
-    df_processed["eff_date"] = pd.to_datetime(df_processed["dayofyear"], format="%j")
 
     return df_processed, df_seasonal_final, df_peaks_final