From f25c98ac8e4e24ade57e4af7aaada39b72de82a8 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Fri, 5 Jul 2024 15:39:47 +0100
Subject: [PATCH 01/16] cpc data request

---
 code/x_cpc_msoa_zoning_request.R | 282 +++++++++++++++++++++++++++++++
 1 file changed, 282 insertions(+)
 create mode 100644 code/x_cpc_msoa_zoning_request.R

diff --git a/code/x_cpc_msoa_zoning_request.R b/code/x_cpc_msoa_zoning_request.R
new file mode 100644
index 0000000..35a4c74
--- /dev/null
+++ b/code/x_cpc_msoa_zoning_request.R
@@ -0,0 +1,282 @@
+library(tidyverse)
+library(sf)
+library(tmap)
+
+
+# ---------- Read in data
+
+# --- 1. CPC data template - 2011 MSOA layer
+cpc_zones <- read_csv("data/external/cpc/ZoningTemplate.csv")
+
+# --- 2. 2011 MSOA boundaries (geo)
+msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp")
+
+# --- 3. Commuting matrices (2021)
+msoa_commute_2021 <- read_csv("data/raw/travel_demand/od_census_2021/ODWP15EW_MSOA_v1.zip")
+
+# simplify
+msoa_commute_2021 <- msoa_commute_2021 %>%
+  rename(msoa_origin = "Middle layer Super Output Areas code", msoa_destination = "MSOA of workplace code") %>%
+  # aggregate: we don't care about gender and age breakdown
+  group_by(msoa_origin, msoa_destination) %>%
+  summarise(commuters = sum(Count)) %>%
+  ungroup()
+
+# add 2011 msoa names
+msoa_2011_2021 <- read_csv("data/external/cpc/MSOA_2011_2021.csv") %>%
+  select(MSOA11CD, MSOA21CD, LAD22CD)
+
+msoa_commute_2021_2011 <- msoa_commute_2021 %>%
+  # add 2011 msoa codes (origin)
+  left_join(msoa_2011_2021 %>%
+              rename_with(~ paste0(.x, "_origin")),
+            by = c("msoa_origin" = "MSOA21CD_origin")) %>%
+  # add 2011 msoa codes (destination)
+  left_join(msoa_2011_2021 %>%
+              rename_with(~ paste0(.x, "_destination")),
+            by = c("msoa_destination" = "MSOA21CD_destination"))
+
+
+
+
+# ---------- Join the geo data (2) onto (1)
+cpc_zones_geo <- cpc_zones %>%
+  left_join(msoa_shp_2011  %>%
+              select(geo_code),
+            by = c("msoa" = "geo_code")) %>%
+  st_as_sf()
+
+
+cps_zones_yh_region <- cpc_zones_geo %>%
+  filter(region_name == "Yorkshire and The Humber")
+
+cps_zones_wy_county <- cpc_zones_geo %>%
+  filter(county_name == "West Yorkshire")
+
+
+
+# ---------- Commuting matrices
+
+# which OD pairs have an origin or destination inside our study area
+msoa_commute_2021_2011_region = msoa_commute_2021_2011 %>%
+  filter(MSOA11CD_origin %in% cps_zones_wy_county$msoa |
+           MSOA11CD_destination %in% cps_zones_wy_county$msoa)
+
+
+# ----------
+
+msoas_of_interest <- cpc_zones_geo %>%
+  filter(msoa %in% msoa_commute_2021_2011_region$MSOA11CD_origin |
+           msoa %in% msoa_commute_2021_2011_region$MSOA11CD_destination)
+
+
+
+# label msoas in west yorkshire
+
+msoas_of_interest %>%
+  group_by(lad) %>%
+  mutate(zone_id = n()) %>%
+  mutate(zone_id = as.character(zone_id)) %>%
+  ungroup() -> x
+
+
+tm_shape(x)+
+  tm_fill(col = "zone_id",
+          palette = "Dark2")
+
+
+# ------------------------- APPROACH (msoa -> lad -> county)
+
+# ----- study area
+study_area_county = "E11000006"
+study_area_region = "E12000003"
+
+# 1.LEVEL = MSOA
+
+# --- west yorkshire
+cpc_zones_wy = cpc_zones_geo %>%
+  filter(county == study_area_county) %>%
+  mutate(id_col = row_number(),
+         zone_type = 1) # internal to the model
+
+
+# 2.LEVEL = LAD
+
+# --- yorkshire humber region
+cpc_zones_yh_region = cpc_zones_geo %>%
+  filter(region == study_area_region) %>%
+  filter(! msoa %in% cpc_zones_wy$msoa) %>%
+  group_by(lad) %>%
+  mutate(id_col = cur_group_id() + max(cpc_zones_wy$id_col)) %>%
+  ungroup() %>%
+  mutate(zone_type = 0) # external to the model
+
+
+# --- greater manchester and lancashire and derbyshire
+cpc_zones_other_neighbors =  cpc_zones_geo %>%
+  filter(county_name %in% c("Greater Manchester", "Derbyshire", "Lancashire")) %>%
+  #filter(! msoa %in% cpc_zones_wy$msoa) %>%
+  group_by(lad) %>%
+  mutate(id_col = cur_group_id() + max(cpc_zones_yh_region$id_col)) %>%
+  ungroup() %>%
+  mutate(zone_type = 0) # external to the model
+
+
+# Combine (LEVEL 1) and (LEVEL 2)
+
+cpc_zones_study_plus = cpc_zones_wy %>%
+  bind_rows(cpc_zones_yh_region) %>%
+  bind_rows(cpc_zones_other_neighbors)
+
+# 3.LEVEL = COUNTY (OR REGION - check which one I used)
+
+# ----- Other areas
+
+cpc_zones_other = cpc_zones_geo %>%
+  filter(country_name == "England") %>%
+  filter(! msoa %in% cpc_zones_study_plus$msoa) %>%
+  group_by(region) %>%
+  mutate(id_col = cur_group_id() + max(cpc_zones_study_plus$id_col)) %>%
+  ungroup() %>%
+  mutate(zone_type = 0) # external to the model
+
+
+# 4.LEVEL = COUNTRY
+
+# ----- Scotland and Wales
+
+cpc_zones_countries = cpc_zones_geo %>%
+  filter(country_name != "England") %>%
+  group_by(country_name) %>%
+  mutate(id_col = cur_group_id() + max(cpc_zones_other$id_col)) %>%
+  ungroup() %>%
+  mutate(zone_type = 0) # external to the model
+
+
+# bring all together
+cpc_zones_all = cpc_zones_study_plus %>%
+  bind_rows(cpc_zones_other) %>%
+  bind_rows(cpc_zones_countries)
+
+
+cpc_zones_all <- cpc_zones_all %>%
+  mutate(zone_id = id_col)
+
+
+cpc_zones_all_df = cpc_zones_all %>%
+  select(msoa:msoa_name) %>%
+  st_drop_geometry()
+
+# Save
+write_csv(cpc_zones_all_df, "data/external/cpc/ZoningTemplateFilled_lad_region.csv")
+
+
+cpc_zones_all %>% filter(zone_id %in% c(347, 348)) -> x
+
+tm_shape(x)+
+  tm_fill(col = "zone_id",
+          palette = "Dark2",
+          legend.show = FALSE)
+
+
+# ------------------------- APPROACH (msoa -> lad) + scot wales as countries (could not use them as LADs due to an error on cpc website) ---------- #
+# ------- THIS IS THE LAYER I SUBMITTED ------- #
+
+# ----- study area
+study_area_county = "E11000006"
+
+# 1.LEVEL = MSOA
+
+# --- west yorkshire
+cpc_zones_wy = cpc_zones_geo %>%
+  filter(county == study_area_county) %>%
+  mutate(id_col = row_number(),
+         zone_type = 1) # internal to the model
+
+
+# 2.LEVEL = LAD
+
+# --- yorkshire humber region
+cpc_zones_other = cpc_zones_geo %>%
+  filter(country_name == "England") %>%
+  filter(! msoa %in% cpc_zones_wy$msoa) %>%
+  group_by(lad) %>%
+  mutate(id_col = cur_group_id() + max(cpc_zones_wy$id_col)) %>%
+  ungroup() %>%
+  mutate(zone_type = 0) # external to the model
+
+
+# ----- Scotland and Wales
+
+cpc_zones_scot_wales = cpc_zones_geo %>%
+  filter(country_name != "England") %>%
+  group_by(country_name) %>%
+  mutate(id_col = cur_group_id() + max(cpc_zones_other$id_col)) %>%
+  ungroup() %>%
+  mutate(zone_type = 0) # external to the model
+
+
+
+# Combine
+
+cpc_zones_all = cpc_zones_wy %>%
+  bind_rows(cpc_zones_other) %>%
+  bind_rows(cpc_zones_scot_wales) %>%
+  mutate(zone_id = id_col)
+
+cpc_zones_all_df = cpc_zones_all %>%
+  select(msoa:msoa_name) %>%
+  st_drop_geometry()
+
+# Save
+write_csv(cpc_zones_all_df, "data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv")
+
+
+cpc_zones_all %>% filter(zone_id %in% c(629, 643, 652)) -> x
+
+tm_shape(x)+
+  tm_fill(col = "lad",
+          palette = "Dark2")
+
+
+
+
+
+
+
+
+
+
+
+
+tm_shape(cpc_zones_all %>%
+           filter(zone_id == 375))+
+  tm_fill(col = "zone_id",
+          palette = "Dark2",
+          legend.show = FALSE)
+
+cpc_zones_all %>%
+  filter(zone_id == 375) -> x
+# PLOTS
+
+tm_shape(cpc_zones_all)+
+  tm_fill(col = "zone_id",
+          palette = "Dark2",
+          legend.show = FALSE)
+
+
+tm_shape(cpc_zones_all %>%
+           #filter(county_name == "West Yorkshire") %>%
+           filter(region_name == "Yorkshire and The Humber") %>%
+           mutate(id_col = as.character(id_col)))+
+  tm_fill(col = "lad",
+          palette = "Dark2")
+
+
+tm_shape(cpc_zones_all %>%
+           filter(county_name == "Inner London" | county_name == "Outer London")) +
+  tm_fill(col = "lad",
+          palette = "Dark2")
+
+
+unique(cpc_zones_all2$zone_id)

From f55879ba39b990b22811d46083dff96352eb9795 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:09:45 +0100
Subject: [PATCH 02/16] cpc data plots

---
 code/x_cpc_exploration.R | 246 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 246 insertions(+)
 create mode 100644 code/x_cpc_exploration.R

diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R
new file mode 100644
index 0000000..2adf051
--- /dev/null
+++ b/code/x_cpc_exploration.R
@@ -0,0 +1,246 @@
+library(tidyverse)
+library(sf)
+library(od)
+library(tmap)
+
+source("R/study_area_geographies.R")
+source("R/filter_od_matrix.R")
+
+plots_path <- "data/processed/plots/eda/cpc/"
+
+# ----------------------- READ IN THE DATA ----------------------- #
+
+# ------- Zoning layer (2011 MSOAs)
+
+zones <- read_csv("data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv")
+# study area zones
+zones_internal = zones %>%
+  filter(zone_type == 1)
+
+# ------- CPC data
+
+# Define the path to the ZIP file
+zip_file_path <- "data/external/cpc/cpc_west_yorkshire_weekdays_march_may.zip"
+# Create a temporary directory to extract the files
+temp_dir <- tempdir()
+# Extract the ZIP file to the temporary directory
+unzip(zip_file_path, exdir = temp_dir)
+# Get a list of all CSV files in the temporary directory
+csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE)
+
+# Read each CSV file into a list of data frames
+cpc_matrices <- purrr::map(csv_files, read.csv)
+
+# Extract the part of the filename after the underscore and before the .csv extension
+file_names <- basename(csv_files)
+short_names <- stringr::str_extract(file_names, "(?<=_).*(?=\\.csv)")
+
+# Name each data frame in the list by its original file name
+names(cpc_matrices) <- short_names
+
+# --- Combine the dfs
+
+# Add a new column to each data frame with the name of the data frame
+cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y))
+
+# Combine into one df
+cpc_matrices_all <- bind_rows(cpc_matrices)
+
+# Add column to sum all flow types
+cpc_matrices_all <- cpc_matrices_all %>%
+  mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE))
+# ------- Geo boundaries
+
+# England
+msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp")
+
+# West Yorkshire (All zones with zone_type == 1)
+
+study_area_large <- msoa_shp_2011 %>%
+  filter(geo_code %in% zones_internal$msoa) %>%
+  rename(MSOA11CD = geo_code)
+
+# Leeds
+
+# --- administrative boundaries
+study_area <- st_read("data/interim/study_area_boundary.geojson")
+# convert to desired resolution
+geography = "MSOA"
+study_area = study_area_geographies(study_area = study_area,
+                                    geography = geography)
+
+# ADD 2011 MSOA codes to study area
+msoa_2011_2021_lookup <- read_csv("data/external/cpc/MSOA_2011_2021.csv")
+
+study_area <- study_area %>%
+  left_join(msoa_2011_2021_lookup %>%
+              select(c("MSOA11CD", "MSOA21CD")),
+            by = "MSOA21CD")
+
+
+
+# ----------------------- ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- #
+
+#  ----- filter zones to study area
+
+# filter matrices to keep study area zones only
+cpc_matrices_all_internal <- cpc_matrices_all %>%
+  filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id)
+
+
+# ----- Add desire lines
+
+# prepare data for od::od_to_sf()
+
+# ----- add msoa 2011 codes
+cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
+  # join origin data
+  left_join(zones_internal %>%
+              select(msoa, zone_id, msoa_name) %>%
+              rename(from_msoa = msoa, from_msoa_name = msoa_name),
+            by = c("from_zone" = "zone_id")) %>%
+  # join destination data
+  left_join(zones_internal %>%
+              select(msoa, zone_id, msoa_name) %>%
+              rename(to_msoa = msoa, to_msoa_name = msoa_name),
+            by = c("to_zone" = "zone_id")) %>%
+  # move msoa codes to first two columns (for od::od_to_sf())
+  relocate("from_msoa", .before = everything()) %>%
+  relocate("to_msoa", .after = "from_msoa")
+
+# od::od_to_sf
+
+# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal,
+#                                              z = study_area_cpc_internal,
+#                                              silent = FALSE)
+
+# remove short ods
+cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
+  rename(Origin = from_msoa, Destination = to_msoa)
+
+# Filter matrix by distance also adds desire lines
+cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, od_matrix = cpc_matrices_all_internal, dist_threshold = 1000)
+
+
+
+
+
+
+
+
+
+map_cpc_flows = function(od_matrix,
+                         area,
+                         column_to_map,
+                         cols,
+                         min_flows){
+
+  # --- study_area
+  # all of WY
+  if (area == "large"){
+    study_area = study_area_large
+  # Leeds only
+  } else if (area == "small") {
+    study_area = study_area
+  }
+
+  # ----- We filter the od_matrix to only include flows that start and end in the study_Area
+  od_matrix = od_matrix %>%
+    filter(Origin %in% study_area$MSOA11CD &
+           Destination %in% study_area$MSOA11CD)
+  # arrange so that highest flows are plotting on top
+  od_matrix = od_matrix %>% arrange(!!sym(column_to_map))
+
+  # ----- Plot
+
+  # Use the dynamic column name in the filter and tm_lines functions
+  tm_shape(study_area) +
+    tm_borders(col = "grey80", alpha = 0.7) +
+    tm_shape(od_matrix %>%
+               filter((!!sym(column_to_map)) > min_flows)) +
+    tm_lines(col = column_to_map,
+             title.col = "Number of people",
+             lwd = column_to_map,
+             legend.lwd.show = FALSE,
+             palette = "YlGnBu", # YlOrRd # PuBuGn
+             scale = 7) +
+    tm_facets(by = "source",
+              free.coords = FALSE,
+              ncol = cols) +
+    # START AND ENDPOINTS
+    tm_shape(od_matrix %>%
+               filter((!!sym(column_to_map)) > min_flows) %>%
+               mutate(geometry = lwgeom::st_startpoint(.))) +
+    tm_dots(col = "darkgreen",
+            alpha = 0.4,
+            #jitter = 0.1,
+            size = column_to_map,
+            scale = 0.5,
+            legend.size.show = FALSE) +
+    tm_facets(by = "source",
+              free.coords = FALSE,
+              ncol = cols,
+              showNA = FALSE) +
+    tm_shape(od_matrix %>%
+               filter((!!sym(column_to_map)) > min_flows) %>%
+               mutate(geometry = lwgeom::st_endpoint(.))) +
+    tm_dots(col = "red",
+            alpha = 0.4,
+            #jitter = 0.1,
+            size = column_to_map,
+            scale = 0.5,
+            legend.size.show = FALSE) +
+    tm_facets(by = "source",
+              free.coords = FALSE,
+              ncol = cols,
+              showNA = FALSE) +
+    tm_layout(fontfamily = 'Georgia',
+              main.title = paste0("Flows: Purpose = ", column_to_map),
+              main.title.size = 1.1,
+              main.title.color = "azure4",
+              main.title.position = "left",
+              panel.label.size = 1,
+              panel.label.bg.color = NA,
+              # legend.outside = TRUE,
+              # legend.outside.position = "bottom",
+              # legend.stack = "horizontal",
+              frame = FALSE) +
+    tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') +
+    tm_add_legend(type = "symbol", labels = 'OD End', col = 'red') -> res
+
+  res
+
+  tmap_save(tm = res, filename = paste0(plots_path, "cpc_facet_", column_to_map, "_scope_", area, ".png"), width = 12, dpi = 1080, asp = 0)
+
+
+
+
+
+
+
+  }
+
+# # Apply the function for a specific column
+# map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf,
+#               area = "small", # c(small, large)
+#               column_to_map = "hbo_outbound",
+#               cols = 5,
+#               min_flows = 20)
+
+
+# Apply to all columns
+columns = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow")
+
+# SMALL (Leeds Boundary)
+purrr::map(.x = columns, ~map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf,
+                                        area = "small",
+                                        column_to_map = .x,
+                                        cols = 5,
+                                        min_flow = 10))
+
+# LARGE (West Yorskshire)
+purrr::map(.x = columns, ~map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf,
+                                        area = "large",
+                                        column_to_map = .x,
+                                        cols = 5,
+                                        min_flow = 10))

From 0df20125eec2ffc571a55d58461c29da443537ee Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Sun, 27 Oct 2024 09:50:07 +0000
Subject: [PATCH 03/16] save tt and demand msoa level

---
 code/travel_demand_census.R | 2 +-
 code/x_cpc_exploration.R    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/code/travel_demand_census.R b/code/travel_demand_census.R
index 22f2b9e..f1eb09c 100644
--- a/code/travel_demand_census.R
+++ b/code/travel_demand_census.R
@@ -191,6 +191,6 @@ census_work_msoa_tt <- census_work_msoa %>%
 
 # save
 #write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv")
-arrow::write_parquet(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.parquet")
+arrow::write_parquet(census_work_msoa_tt, )
 
 
diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R
index 2adf051..9d345b8 100644
--- a/code/x_cpc_exploration.R
+++ b/code/x_cpc_exploration.R
@@ -49,6 +49,7 @@ cpc_matrices_all <- bind_rows(cpc_matrices)
 # Add column to sum all flow types
 cpc_matrices_all <- cpc_matrices_all %>%
   mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE))
+
 # ------- Geo boundaries
 
 # England

From af81246279974621d29fe54812ff9de6e1098308 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Wed, 30 Oct 2024 08:41:19 +0000
Subject: [PATCH 04/16] begin temporal data prep

---
 code/travel_demand_temporal.R | 206 ++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 code/travel_demand_temporal.R

diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R
new file mode 100644
index 0000000..c4991f5
--- /dev/null
+++ b/code/travel_demand_temporal.R
@@ -0,0 +1,206 @@
+### the purpose of this script is to work with travel demand data from cpc  ###
+### the data is only available at MSOA level. It is cleaned and joined onto ###
+### the travel time results
+
+library(tidyverse)
+library(sf)
+
+
+source("R/study_area_geographies.R")
+source("R/filter_od_matrix.R")
+
+# --------------- 1. READ IN THE DEMaND DATA
+
+# ------- Zoning layer (2011 MSOAs)
+
+zones <- read_csv("data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv")
+# study area zones
+zones_internal = zones %>%
+  filter(zone_type == 1)
+
+# ------- CPC data
+
+# Define the path to the ZIP file
+zip_file_path <- "data/external/cpc/cpc_west_yorkshire_weekdays_march_may.zip"
+# Create a temporary directory to extract the files
+temp_dir <- tempdir()
+# Extract the ZIP file to the temporary directory
+unzip(zip_file_path, exdir = temp_dir)
+# Get a list of all CSV files in the temporary directory
+csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE)
+
+# Read each CSV file into a list of data frames
+cpc_matrices <- purrr::map(csv_files, read.csv)
+
+# Extract the part of the filename after the underscore and before the .csv extension
+file_names <- basename(csv_files)
+short_names <- stringr::str_extract(file_names, "(?<=_).*(?=\\.csv)")
+
+# Name each data frame in the list by its original file name
+names(cpc_matrices) <- short_names
+
+# --- Combine the dfs
+
+# Add a new column to each data frame with the name of the data frame
+cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y))
+
+# Combine into one df
+cpc_matrices_all <- bind_rows(cpc_matrices)
+
+# replace NA values with 0
+cols <- c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb")
+
+cpc_matrices_all <- cpc_matrices_all %>%
+  mutate(across(all_of(cols), ~replace_na(., 0)))
+
+# Add column to sum all flow types
+cpc_matrices_all <- cpc_matrices_all %>%
+  mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE))
+
+# ------- Geo boundaries
+
+# England
+msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp")
+
+# West Yorkshire (All zones with zone_type == 1)
+
+study_area_large <- msoa_shp_2011 %>%
+  filter(geo_code %in% zones_internal$msoa) %>%
+  rename(MSOA11CD = geo_code)
+
+# Leeds
+
+# --- administrative boundaries
+study_area <- st_read("data/interim/study_area_boundary.geojson")
+# convert to desired resolution
+geography = "MSOA"
+study_area = study_area_geographies(study_area = study_area,
+                                    geography = geography)
+
+# ADD 2011 MSOA codes to study area
+msoa_2011_2021_lookup <- read_csv("data/external/cpc/MSOA_2011_2021.csv")
+
+study_area <- study_area %>%
+  left_join(msoa_2011_2021_lookup %>%
+              select(c("MSOA11CD", "MSOA21CD")),
+            by = "MSOA21CD")
+
+
+# ----------------------- 2. ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- #
+
+#  ----- filter zones to study area
+
+# filter matrices to keep study area zones only (i.e intrazonal flows only)
+cpc_matrices_all_internal <- cpc_matrices_all %>%
+  filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id)
+
+
+# ----- Add desire lines
+
+# prepare data for od::od_to_sf()
+
+# ----- add msoa 2011 codes
+cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
+  # join origin data
+  left_join(zones_internal %>%
+              select(msoa, zone_id, msoa_name) %>%
+              rename(from_msoa = msoa, from_msoa_name = msoa_name),
+            by = c("from_zone" = "zone_id")) %>%
+  # join destination data
+  left_join(zones_internal %>%
+              select(msoa, zone_id, msoa_name) %>%
+              rename(to_msoa = msoa, to_msoa_name = msoa_name),
+            by = c("to_zone" = "zone_id")) %>%
+  # move msoa codes to first two columns (for od::od_to_sf())
+  relocate("from_msoa", .before = everything()) %>%
+  relocate("to_msoa", .after = "from_msoa")
+
+# od::od_to_sf
+
+# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal,
+#                                              z = study_area_cpc_internal,
+#                                              silent = FALSE)
+
+# Filter to Leeds only
+cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
+  filter(from_msoa %in% study_area$MSOA11CD & to_msoa %in% study_area$MSOA11CD)
+
+# Rename columns
+cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
+  rename(Origin = from_msoa, Destination = to_msoa)
+
+
+# Filter matrix by distance also adds desire lines
+cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large,
+                                                          od_matrix = cpc_matrices_all_internal,
+                                                          dist_threshold = 1000)
+
+
+
+
+# -------------- 3.  JOIN TRAVEL TIME DATA
+
+# ----------  MSOA level
+
+# read in the data
+tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded.parquet")
+
+# some OD pairs don't have travel time data (for some combinations). Expand grid so that we explitly mention these OD pairs
+tt_matrix_msoa_exp <- tidyr::crossing(from_id = tt_matrix_msoa$from_id, to_id = tt_matrix_msoa$to_id, combination = tt_matrix_msoa$combination)
+
+tt_matrix_msoa <- tt_matrix_msoa_exp %>%
+  left_join(tt_matrix_msoa, by = c("from_id", "to_id", "combination"))
+
+
+
+# add metadata (MSOA for each home and workplace zone)
+tt_matrix_msoa <- tt_matrix_msoa %>%
+  left_join(study_area %>%
+              select(MSOA21CD, MSOA11CD, OBJECTID) %>%
+              st_drop_geometry() %>%
+              rename_with(~paste0(., "_home")) %>%
+              mutate(across(everything(), ~as.character(.))),
+            by = c("from_id" = "OBJECTID_home")) %>%
+  left_join(study_area %>%
+              select(MSOA21CD, MSOA11CD, OBJECTID) %>%
+              st_drop_geometry() %>%
+              rename_with(~paste0(., "_work")) %>%
+              mutate(across(everything(), ~as.character(.))),
+            by = c("to_id" = "OBJECTID_work"))
+
+
+# ----- Prepare time_of_day column in tt_matrix to match demand matrix
+
+# keep only pt travel times
+tt_matrix_msoa <- tt_matrix_msoa %>%
+  filter(str_detect(combination, "pt_"))
+
+# add combination column to cpc data
+cpc_matrices_all_internal_sf <- cpc_matrices_all_internal_sf %>%
+  mutate(combination = case_when(
+    source %in% c("05-06", "06-07", "07-08", "08-09", "09-10", "10-11", "11-12") ~ "pt_wkday_morning",
+    source %in% c("12-13", "13-14", "14-15", "15-16", "16-17") ~ "pt_wkday_afternoon",
+    source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_evening")
+    )
+
+# group by combination column
+cols_to_sum = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow")
+
+cpc_matrices_all_internal_sf_grouped <- cpc_matrices_all_internal_sf %>%
+  #st_drop_geometry() %>%
+  group_by(Origin, Destination, distance_m, combination) %>%
+  summarise(across(cols_to_sum, sum, na.rm = TRUE))
+
+
+# join travel time data and census commute data
+#TODO: join on time of day also!!!
+
+census_work_msoa_tt <- cpc_matrices_all_internal_sf_long %>%
+  left_join(tt_matrix_msoa, by = c("Origin" = "MSOA11CD_home", "Destination" = "MSOA11CD_work", "combination"))
+
+
+# save
+#write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv")
+arrow::write_parquet(census_work_msoa_tt, )
+
+

From fa52a059ce28ac18674adb624f4c1a014cb7c1fd Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Fri, 1 Nov 2024 17:44:20 +0000
Subject: [PATCH 05/16] progress on temporal clustering

---
 code/demand_cluster_flows.R      |   2 +-
 code/demand_cluster_flows_prep.R |  39 ++----
 code/demand_on_buses.R           |  98 ++++++++++-----
 code/performance_od.R            | 208 ++++++++-----------------------
 code/travel_demand_temporal.R    |  55 ++++----
 code/x_cpc_exploration.R         |  14 ++-
 6 files changed, 160 insertions(+), 256 deletions(-)

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index 5400e80..54dbf87 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -16,8 +16,8 @@ source("R/dbscan_sensitivity.R")
 ########## ----------------------- Read in the data ----------------------- ##########
 geography <- "MSOA"
 
+od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"))
 # od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson"))
-od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson"))
 
 # path tp save plots
 plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index fe8ac10..0ef5388 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -46,20 +46,12 @@ study_area <- study_area %>%
 
 # Demand (census) + supply (travel time) data
 
-if(mode == FALSE){
-  # data with "commute_all" only
-  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
-} else{
-  # data with modes
-  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
-}
-
-# filter to specific combination
-# TODO: get seperate flows for car and pt, and keep two combinations
-od_demand <- od_demand %>%
-  filter(combination == "pt_wkday_morning")
+od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
+
+# # filter to specific combination
+# # TODO: get seperate flows for car and pt, and keep two combinations
+# od_demand <- od_demand %>%
+#   filter(combination == "pt_wkday_morning")
 
 od_demand <- od_demand %>%
   select(-distance_m)
@@ -125,7 +117,7 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
 dir(tempdir())
 
 od_demand_for_jittering <- od_demand_filtered %>%
-  select(Origin, Destination, starts_with("commute_"))
+  select(Origin, Destination, total_flow)
 
 # arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details
 
@@ -138,7 +130,7 @@ od_demand_jittered = odjitter::jitter(
   origin_key = "Origin",
   destination_key = "Destination",
   # column with the flows (to be disaggregated)
-  disaggregation_key = "commute_all",
+  disaggregation_key = "total_flow",
   # What's the maximum number of trips per output OD row that's allowed?
   disaggregation_threshold = 95,
 
@@ -167,9 +159,12 @@ od_demand_jittered = odjitter::jitter(
   deduplicate_pairs = TRUE
 )
 
+# # TODO: remove when jittering is fixed
+# od_demand_jittered <- od_demand_filtered
+
 # jittered returns fractions. Round them
 od_demand_jittered <- od_demand_jittered %>%
-  mutate(across(starts_with("commute_"), round))
+  mutate(across(starts_with("total_flow"), round))
 
 
 
@@ -227,15 +222,7 @@ od_demand_jittered_scenarios <- od_demand_jittered %>%
 
 ### Save the sfs for each scenario
 
-
-if(mode == FALSE){
-  # data with "commute_all" only
-  st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson"), delete_dsn = TRUE)
-
-} else{
-  # data with modes
-  st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson"), delete_dsn = TRUE)
-  }
+st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"), delete_dsn = TRUE)
 
 
 
diff --git a/code/demand_on_buses.R b/code/demand_on_buses.R
index 12a1466..01b8806 100644
--- a/code/demand_on_buses.R
+++ b/code/demand_on_buses.R
@@ -23,7 +23,7 @@ source("R/filter_od_matrix.R")
 ########## ----------------------- Read in the data ----------------------- ##########
 
 # is the demand data disaggregated by mode?
-mode = TRUE
+mode = FALSE
 
 # ----------- 1. Study area
 
@@ -54,25 +54,18 @@ gtfs_paths = paste0(gtfs_dir, gtfs_names)
 gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus", gtfs_paths)])
 #gtfs_rail <- gtfstools::read_gtfs(gtfs_paths[grepl("rail", gtfs_paths)])
 
-# --- filter the feeds to a specific point in time
-gtfs_trip_ids <- gtfs_bus$frequencies %>%
-  filter(start_time == "07:30:00")
-
-gtfs_bus <- gtfs_bus %>%
-  gtfstools::filter_by_trip_id(gtfs_trip_ids$trip_id)
+# # --- filter the feeds to a specific point in time
+# gtfs_trip_ids <- gtfs_bus$frequencies %>%
+#   filter(start_time == "07:30:00")
+#
+# gtfs_bus <- gtfs_bus %>%
+#   gtfstools::filter_by_trip_id(gtfs_trip_ids$trip_id)
 
 # ----------- 3.  Census OD data
 
 # Demand (census) + supply (travel time) data
 
-if(mode == TRUE){
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet"))
-
-} else{
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet"))
-}
-
-# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed.parquet"))
 
 # # columns to reference (they differ based on geography)
 # from_id_col = paste0(geography, "21CD_home")
@@ -107,8 +100,8 @@ od_supply <- od_supply %>%
 
 
 # save the output
-arrow::write_parquet(od_supply, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage.parquet"))
-#od_supply <- arrow::read_parquet( paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage.parquet"))
+arrow::write_parquet(od_supply, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage_temporal.parquet"))
+#od_supply <- arrow::read_parquet( paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage_temporal.parquet"))
 
 # --- remove od pairs with very short distance
 
@@ -135,13 +128,12 @@ od_supply_filtered = filter_matrix_by_distance(zones = study_area,
 #   filter(combination == "pt_wkday_morning")
 
 od_sd <- od_demand %>%
-  filter(combination == "pt_wkday_morning") %>%
   left_join(od_supply_filtered %>%
               select(-distance_m) %>%
               st_drop_geometry(),
   by = c("Origin", #  = from_id_col,
-                    "Destination", #  = to_id_col,
-                    "departure_time" = "start_time"))
+         "Destination", #  = to_id_col,
+         "departure_time" = "start_time"))
 
 
 # # save output
@@ -153,7 +145,7 @@ od_sd <- od_demand %>%
 # Method 1: all_to_all
 trips_sd_1 <- od_sd %>%
   group_by(trip_id, departure_time, combination) %>%
-  summarise(potential_demand_all_to_all = sum(commute_all, na.rm = TRUE)) %>%
+  summarise(potential_demand_all_to_all = sum(total_flow, na.rm = TRUE)) %>%
   ungroup()
 
 # Method 2: frequency-based
@@ -162,7 +154,7 @@ trips_sd_2 <- od_sd %>%
   # get number of passengers on each route for each OD pair
   mutate(group_id = cur_group_id(),
          frequency_min = 3600/headway_secs,
-         commute_route = round((commute_all * frequency_min) / sum(frequency_min))) %>%
+         commute_route = round((total_flow * frequency_min) / sum(frequency_min))) %>%
   ungroup() %>%
   # sum over the route
   group_by(trip_id, departure_time, combination) %>%
@@ -173,7 +165,7 @@ trips_sd_2 <- od_sd %>%
 trips_sd_3 <- od_sd %>%
   group_by(departure_time, combination, Origin, Destination) %>%
   # get number of passengers on each route for each OD pair
-  mutate(commute_route = round((commute_all / n()))) %>%
+  mutate(commute_route = round((total_flow / n()))) %>%
   ungroup() %>%
   # sum over the route
   group_by(trip_id, departure_time, combination) %>%
@@ -204,12 +196,8 @@ od_trips_sd <- od_trips_sd %>%
   dplyr::top_n(1, potential_demand_equal_split) %>%
   ungroup()
 
-if(mode == TRUE){
-  arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
 
-} else{
-  arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
-}
+arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
 
 # ----------- 4. Add geometry to plot results
 
@@ -229,7 +217,7 @@ trips_sd_sf <- trips_sd %>%
 
 
 # ----------- 5. Save the output
-st_write(trips_sd_sf, paste0("data/processed/travel_demand/trips_potential_demand_census_", geography, ".geojson"), delete_dsn = TRUE)
+st_write(trips_sd_sf, paste0("data/processed/travel_demand/trips_potential_demand_temporal_", geography, ".geojson"), delete_dsn = TRUE)
 
 
 
@@ -245,18 +233,24 @@ trips_sd_sf_shape_sum <- trips_sd_sf %>%
 # ###########  ---------------------------  5. Plots  --------------------------- ##########
 
 
+
 # ----------- Potential demand histograms ----------- #
 
-plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/"
+plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/temporal/"
 
 # TRIP LEVEL
 
+trips_sd = trips_sd %>%
+  mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening")))
+
+
 # equal split
 
 trips_sd %>%
   #mutate(potential_demand_equal_split = replace_na(potential_demand_equal_split, 0)) %>%
   ggplot(aes(x = potential_demand_equal_split)) +
   geom_histogram(bins = 50, alpha = 0.8) +
+  facet_grid(combination ~.) +
   labs(title = "Potential demand on PT routes",
        subtitle = "Equal split of demand from each OD\nonto routes that serve it",
        x = "Potential demand (no. of passengers)",
@@ -276,7 +270,7 @@ trips_sd %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
   geom_histogram(binwidth = 0.5, alpha = 0.8) +
-  facet_wrap(~ distribution_type) +
+  facet_grid(combination ~ distribution_type) +
   labs(title = "Potential demand on PT routes",
        subtitle = "Different approaches to distributing OD demand",
        x = "Potential demand (no. of passengers using route) - Thousands",
@@ -304,7 +298,7 @@ trips_sd %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
   geom_histogram(binwidth = 0.1, alpha = 0.8) +
-  facet_wrap(~ distribution_type) +
+  facet_grid(combination ~ distribution_type) +
   labs(title = "Potential demand on PT routes",
        subtitle = "Different approaches to distributing OD demand",
        x = "Potential demand (no. of passengers using route) - Thousands",
@@ -319,6 +313,9 @@ ggsave(filename = paste0(plots_path, "plot_hist_potential_demand_routes_two_meth
 
 #  ----- OD LEVEL
 
+od_trips_sd = od_trips_sd %>%
+  mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening")))
+
 # equal split
 
 od_trips_sd %>%
@@ -335,6 +332,7 @@ od_trips_sd %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
   geom_histogram(binwidth = 0.5, alpha = 0.8) +
+  facet_grid(.~combination) +
   #geom_density()+
   #scale_y_log10() +
   #geom_vline(data = percentiles_df, aes(xintercept = potential_demand, color = "blue")) +
@@ -370,7 +368,7 @@ od_trips_sd %>%
   ggplot(aes(x = potential_demand)) +
   geom_histogram(binwidth = 0.5, alpha = 0.8) +
   #scale_y_log10() +
-  facet_wrap(~ distribution_type) +
+  facet_grid(combination ~ distribution_type) +
   labs(title = "Potential demand on PT routes",
        subtitle = "Different approaches to distributing OD demand",
        x = "Potential demand on busiest route serving OD pair ('000)",
@@ -400,7 +398,7 @@ od_trips_sd %>%
   ggplot(aes(x = potential_demand)) +
   geom_histogram(binwidth = 0.5, alpha = 0.8) +
   #scale_y_log10() +
-  facet_wrap(~ distribution_type) +
+  facet_grid(combination ~ distribution_type) +
   labs(title = "Potential demand on PT routes",
        subtitle = "Different approaches to distributing OD demand",
        x = "Potential demand on busiest route serving OD pair ('000)",
@@ -447,6 +445,25 @@ trips_sd %>%
 
 ggsave(filename = paste0(plots_path, "plot_dens_potential_demand_trips_equal_split.png"))
 
+trips_sd %>%
+  #mutate(potential_demand_equal_split = replace_na(potential_demand_equal_split, 0)) %>%
+  ggplot(aes(x = potential_demand_equal_split)) +
+  geom_density()+
+  #scale_y_log10() +
+  geom_vline(data = percentiles_trip, aes(xintercept = potential_demand, color = labels))+
+  facet_grid(.~combination) +
+  labs(title = "Potential demand on PT routes",
+       subtitle = "Equal split of demand from each OD\nonto routes that serve it",
+       x = "Potential demand (no. of passengers)",
+       color = "Percentile") +
+  theme(
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    axis.title.x = element_text(size = 8),
+    axis.title.y = element_text(size = 8),
+    legend.position = "bottom"
+  )
+
+ggsave(filename = paste0(plots_path, "plot_dens_potential_demand_trips_equal_split_facet.png"))
 
 
 # --- OD level
@@ -473,6 +490,7 @@ od_trips_sd %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
   geom_density()+
+  facet_grid(.~combination) +
   #scale_y_log10() +
   geom_vline(data = percentiles_df, aes(xintercept = potential_demand, color = labels))+
   scale_color_brewer(type = "qual", palette = "Set1") +
@@ -515,6 +533,16 @@ percentiles_speed <- data.frame(
   labels = c("10", "25", "50", "75", "90")
 )
 
+percentiles_speed_combination <- od_trips_sd %>%
+  group_by(combination) %>%
+  summarise(
+    percentiles = list(c(0.1, 0.25, 0.5, 0.75, 0.9)),
+    speed_kph = list(quantile(speed_kph, c(0.1, 0.25, 0.5, 0.75, 0.9))),
+    labels = list(c("10", "25", "50", "75", "90"))
+  ) %>%
+  unnest(cols = c(percentiles, speed_kph, labels))
+
+
 
 od_trips_sd %>%
   ggplot(aes(x = speed_kph)) +
@@ -536,6 +564,8 @@ od_trips_sd %>%
 ggsave(filename = paste0(plots_path, "plot_dens_speeds_od_equal_split.png"))
 
 
+
+
 #
 #
 # # ----- a)  potential ridership on all different trips
diff --git a/code/performance_od.R b/code/performance_od.R
index 1ca4dbc..d6d806b 100644
--- a/code/performance_od.R
+++ b/code/performance_od.R
@@ -15,9 +15,10 @@ source("R/filter_od_matrix.R")
 geography = "MSOA"
 # are we processing the data that is disaggregated by mode?
 mode = FALSE
+source = "cpc"
 
 # --- where do we want to save the plots?
-plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/")
+plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/", source, "/")
 
 
 
@@ -40,12 +41,7 @@ study_area <- study_area %>%
 
 # ----- Travel time and demand matrix
 
-if(mode == TRUE){
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
-
-} else{
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
-}
+od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), ".parquet"))
 
 
 
@@ -60,7 +56,8 @@ to_id_col = paste0(geography, "21CD_work")
 # rename columns for function below
 od_demand <- od_demand %>%
   rename(Origin = all_of(from_id_col),
-         Destination = all_of(to_id_col))
+         Destination = all_of(to_id_col)) %>%
+  relocate(Origin, Destination)
 
 
 
@@ -98,7 +95,7 @@ od_demand_sf_rank <- od_demand_sf %>%
          speed_percentile_fct = cut(speed_percentile,
                                     breaks = seq(0, 1, by = 0.25),
                                     include.lowest = TRUE),
-         demand_percentile = percent_rank(commute_all),
+         demand_percentile = percent_rank(total_flow),
          demand_percentile_fct = cut(demand_percentile,
                                      breaks = seq(0, 1, by = 0.25),
                                      include.lowest = TRUE),
@@ -108,12 +105,7 @@ od_demand_sf_rank <- od_demand_sf %>%
 
 # ---------- 5. Save the output
 
-if(mode == TRUE){
-  arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet"))
-
-} else{
-  arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet"))
-}
+arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed.parquet"))
 
 
 
@@ -134,7 +126,7 @@ tm_shape(study_area) +
              filter(combination == "pt_wkday_morning") %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
   tm_lines(col = "n_rides",
-           lwd = "commute_all",
+           lwd = "total_flow",
            #legend.lwd.show = FALSE,
            alpha = 0.5,
            title.col = "No. of rides required",
@@ -175,8 +167,8 @@ tm_shape(study_area) +
              alpha = 0.5) +
   tm_shape(od_demand_sf_rank %>%
              filter(combination == "pt_wkday_morning", n_rides == 1)) +
-  tm_lines(col = "commute_all",
-           lwd = "commute_all",
+  tm_lines(col = "total_flow",
+           lwd = "total_flow",
            legend.lwd.show = FALSE,
            alpha = 0.8,
            title.col = "Demand between OD pair",
@@ -216,8 +208,8 @@ tm_shape(study_area) +
           alpha = 0.5) +
   tm_shape(od_demand_sf_rank %>%
              filter(combination == "pt_wkday_morning", speed_percentile <= 0.25)) +
-  tm_lines(col = "commute_all",
-           lwd = "commute_all",
+  tm_lines(col = "total_flow",
+           lwd = "total_flow",
            scale = 10,
            palette = "RdYlGn",
            legend.lwd.show = FALSE,
@@ -257,8 +249,8 @@ tm_shape(study_area) +
   tm_shape(od_demand_sf_rank %>%
              mutate(n_rides = round(n_rides)) %>%
              filter(speed_percentile <= 0.25, combination == "pt_wkday_morning")) +
-  tm_lines(col = "commute_all",
-           lwd = "commute_all",
+  tm_lines(col = "total_flow",
+           lwd = "total_flow",
            scale = 10,
            #palette = "Blues",
            #style = "quantile",
@@ -310,7 +302,7 @@ tm_shape(study_area) +
              filter(combination == "pt_wkday_morning", !is.na(quadrant)) %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
   tm_lines(col = "n_rides",
-           lwd = "commute_all",
+           lwd = "total_flow",
            #legend.lwd.show = FALSE,
            alpha = 0.5,
            title.col = "No. of rides required",
@@ -354,7 +346,7 @@ tm_shape(study_area) +
              filter(combination == "pt_wkday_morning", quadrant == "low speed high demand") %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
   tm_lines(col = "n_rides",
-           lwd = "commute_all",
+           lwd = "total_flow",
            #legend.lwd.show = FALSE,
            alpha = 0.5,
            title.col = "No. of rides required",
@@ -394,8 +386,8 @@ tm_shape(study_area) +
   tm_shape(od_demand_sf_rank %>%
              filter(combination == "pt_wkday_morning", quadrant == "low speed high demand", n_rides != 0) %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
-  tm_lines(col = "commute_all",
-           lwd = "commute_all",
+  tm_lines(col = "total_flow",
+           lwd = "total_flow",
            #legend.lwd.show = FALSE,
            alpha = 0.5,
            title.col = "Demand between OD pair",
@@ -443,7 +435,7 @@ tm_shape(study_area) +
              filter(combination == "pt_wkday_morning", quadrant == "low speed high demand") %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
   tm_lines(col = "n_rides",
-           lwd = "commute_all",
+           lwd = "total_flow",
            #legend.lwd.show = FALSE,
            alpha = 0.5,
            title.col = "No. of rides required",
@@ -487,7 +479,7 @@ tm_shape(study_area) +
              filter(combination == "pt_wkday_morning") %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
   tm_lines(col = "n_rides",
-           lwd = "commute_all",
+           lwd = "total_flow",
            #legend.lwd.show = FALSE,
            alpha = 0.5,
            title.col = "No. of rides required",
@@ -514,133 +506,37 @@ tm_shape(study_area) +
 
 
 
+# ----- New plots after getting CPC temporal data
 
+tm_shape(study_area) +
+  tm_fill(col = "grey95") +
+  tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(od_demand_sf_rank %>%
+             filter(!is.na(quadrant)) %>%
+             mutate(n_rides = as.factor(round(n_rides)))) +
+  tm_lines(col = "n_rides",
+           lwd = "total_flow",
+           #legend.lwd.show = FALSE,
+           alpha = 0.5,
+           title.col = "No. of rides required",
+           title.lwd = "Demand between OD pair",
+           legend.col.is.portrait = FALSE,
+           palette = "Dark2", #-RdYlGn
+           style = "quantile",
+           scale = 15) +
+  tm_facets_grid("quadrant", "combination",
+                 type = "stack",
+                 free.coords = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = "Demand between OD pairs\nfaceted by combination of speed and demand (percentiles)\nbetween OD pairs",
+            main.title.size = 1.3,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            legend.outside = TRUE,
+            legend.outside.position = "bottom",
+            legend.stack = "horizontal",
+            legend.title.size = 0.9,
+            frame = FALSE)
 
-# ARCHIVE FOR NOW
-
-#
-#
-# ggplot(od_demand_sf_rank %>%
-#          filter(ride_time > 5),
-#        aes(x = distance_m/ride_time, y = speed_kph, color = combination)) +
-#   geom_point()
-#
-#
-# ggplot(od_demand_sf_rank %>%
-#          filter(ride_time > 5, combination == "pt_wkday_morning"),
-#        aes(x = distance_m/ride_time, y = speed_kph)) +
-#   geom_point()
-#
-# ggplot(od_demand_sf_rank %>%
-#          filter(ride_time > 5, combination != "car"),
-#        aes(x = ride_time / total_time)) +
-#   geom_histogram() +
-#   facet_wrap(~combination, ncol = 3)
-#
-#
-#
-# # ----- exploratory plots
-# ggplot(od_demand_sf_rank, aes(x = speed_kph, y = speed_percentile, color = combination)) +
-#   geom_point()
-#
-# # speed
-# od_demand_sf_rank %>%
-#   filter(combination != "car") %>%
-#   ggplot(aes(x = speed_kph, y = speed_percentile, color = combination)) +
-#   geom_point()
-#
-# # demand
-# od_demand_sf_rank %>%
-#   filter(combination != "car") %>%
-#   ggplot(aes(x = commute_all, y = demand_percentile, color = combination)) +
-#   geom_point()
-#
-# od_demand_sf_rank %>%
-#   filter(combination != "car") %>%
-#   ggplot(aes(x = demand_percentile, fill = combination)) +
-#   geom_histogram() +
-#   facet_wrap(~combination, ncol = 3)
-#
-# od_demand_sf_rank %>%
-#   filter(combination != "car") %>%
-#   ggplot(aes(x = commute_all, fill = combination)) +
-#   geom_histogram() +
-#   facet_wrap(~combination, ncol = 3)
-#
-#
-# od_demand_sf_rank_f <- od_demand_sf_rank %>%
-#   filter(combination == "pt_wkday_morning")
-#
-#
-# od_demand_sf_rank %>%
-#   filter(combination == "pt_wkday_morning") %>%
-#   # as.factor to avoid "continuous value to discrete scale" error
-#   ggplot(aes(x = speed_percentile, y = demand_percentile, color = distance_m)) +
-#   geom_point() +
-#   scale_color_distiller(palette="Reds", direction = 1)
-#
-#
-# od_demand_sf_rank %>%
-#   filter(combination != "car") %>%
-#   # as.factor to avoid "contiuous value to discrete scale
-#   ggplot(aes(x = speed_percentile, y = demand_percentile, color = distance_m)) +
-#   geom_point() +
-#   scale_color_distiller(palette="Reds", direction = 1) +
-#   facet_wrap(~combination, ncol = 3)
-#
-#
-#
-#
-#
-# # round n_rides to whole number
-# # TODO: move this further upstream (previous script)
-# od_demand_sf_rank <- od_demand_sf_rank %>%
-#   mutate(n_rides = round(n_rides))
-#
-#
-# # ---------- 5. Create communities based on demand (or travel time?)
-#    # different community for PT and PVT
-#
-# # ----------- 5. Plots
-#
-# # ----------- prep the layer
-# # TODO: remove this when we have multiple start times (currently the od_supply df only has 7:30 - WHY? )
-# od_demand_sf_rank <- od_demand_sf_rank %>%
-#   filter(combination == "pt_wkday_morning")
-#
-#
-# # ----------- 6. Maps
-#
-#
-#
-#
-#
-#
-#
-#
-#
-# sfnetworks::as_sfnetwork(od_demand_sf_rank, directed = FALSE) -> x
-#
-# x %>% activate(nodes) %>%
-#   mutate(group = tidygraph::group_louvain(weights = `commute_all`)) -> x2
-#
-# # OR
-# # x2 = x %>%
-# #   morph(to_linegraph) %>%
-# #   mutate(group = tidygraph::group_louvain(weights = "commute_all")) %>%
-# #   unmorph()
-#
-#
-# plot(st_geometry(x, "edges"), col = "grey", lwd = 0.5)
-# plot(st_geometry(study_area))
-#
-# x2 %>%
-#   activate("nodes") %>%
-#   st_as_sf() %>%
-#   transmute(group = as.factor(group)) %>%
-#   plot(lwd = 4, add = TRUE)
-#
-#
-#
-#
-#
diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R
index c4991f5..97d6857 100644
--- a/code/travel_demand_temporal.R
+++ b/code/travel_demand_temporal.R
@@ -9,7 +9,7 @@ library(sf)
 source("R/study_area_geographies.R")
 source("R/filter_od_matrix.R")
 
-# --------------- 1. READ IN THE DEMaND DATA
+# --------------- 1. READ IN THE DEMAND DATA
 
 # ------- Zoning layer (2011 MSOAs)
 
@@ -86,7 +86,7 @@ study_area <- study_area %>%
             by = "MSOA21CD")
 
 
-# ----------------------- 2. ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- #
+# ----------------------- 2. ADD MSOA CODES AND FILTER TO INTERNAL FLOWS ----------------------- #
 
 #  ----- filter zones to study area
 
@@ -94,9 +94,6 @@ study_area <- study_area %>%
 cpc_matrices_all_internal <- cpc_matrices_all %>%
   filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id)
 
-
-# ----- Add desire lines
-
 # prepare data for od::od_to_sf()
 
 # ----- add msoa 2011 codes
@@ -115,30 +112,12 @@ cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
   relocate("from_msoa", .before = everything()) %>%
   relocate("to_msoa", .after = "from_msoa")
 
-# od::od_to_sf
-
-# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal,
-#                                              z = study_area_cpc_internal,
-#                                              silent = FALSE)
 
 # Filter to Leeds only
 cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
   filter(from_msoa %in% study_area$MSOA11CD & to_msoa %in% study_area$MSOA11CD)
 
-# Rename columns
-cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
-  rename(Origin = from_msoa, Destination = to_msoa)
-
-
-# Filter matrix by distance also adds desire lines
-cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large,
-                                                          od_matrix = cpc_matrices_all_internal,
-                                                          dist_threshold = 1000)
-
-
-
-
-# -------------- 3.  JOIN TRAVEL TIME DATA
+# -------------- 3.  JOIN TRAVEL TIME DATA ---------- #
 
 # ----------  MSOA level
 
@@ -176,7 +155,7 @@ tt_matrix_msoa <- tt_matrix_msoa %>%
   filter(str_detect(combination, "pt_"))
 
 # add combination column to cpc data
-cpc_matrices_all_internal_sf <- cpc_matrices_all_internal_sf %>%
+cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
   mutate(combination = case_when(
     source %in% c("05-06", "06-07", "07-08", "08-09", "09-10", "10-11", "11-12") ~ "pt_wkday_morning",
     source %in% c("12-13", "13-14", "14-15", "15-16", "16-17") ~ "pt_wkday_afternoon",
@@ -186,21 +165,31 @@ cpc_matrices_all_internal_sf <- cpc_matrices_all_internal_sf %>%
 # group by combination column
 cols_to_sum = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow")
 
-cpc_matrices_all_internal_sf_grouped <- cpc_matrices_all_internal_sf %>%
-  #st_drop_geometry() %>%
-  group_by(Origin, Destination, distance_m, combination) %>%
-  summarise(across(cols_to_sum, sum, na.rm = TRUE))
+cpc_matrices_all_internal_grouped <- cpc_matrices_all_internal %>%
+  group_by(from_msoa, to_msoa, combination) %>%
+  summarise(across(cols_to_sum, sum, na.rm = TRUE)) %>%
+  ungroup()
 
 
 # join travel time data and census commute data
 #TODO: join on time of day also!!!
 
-census_work_msoa_tt <- cpc_matrices_all_internal_sf_long %>%
-  left_join(tt_matrix_msoa, by = c("Origin" = "MSOA11CD_home", "Destination" = "MSOA11CD_work", "combination"))
+cpc_matrices_all_internal_tt <- cpc_matrices_all_internal_grouped %>%
+  left_join(tt_matrix_msoa, by = c("from_msoa" = "MSOA11CD_home", "to_msoa" = "MSOA11CD_work", "combination"))
+
+
+# # ---------- 4. ADD DESIRE LINES ---------- #
+#
+# # # Filter matrix by distance also adds desire lines
+# cpc_matrices_all_internal_tt_sf <- filter_matrix_by_distance(zones = study_area_large,
+#                                                           od_matrix = cpc_matrices_all_internal_tt,
+#                                                           dist_threshold = 500)
+
 
 
 # save
-#write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv")
-arrow::write_parquet(census_work_msoa_tt, )
+#write_csv(cpc_matrices_all_internal_tt, "data/raw/travel_demand/cpc_matrices_2019/demand_study_area_msoa.csv")
+arrow::write_parquet(cpc_matrices_all_internal_tt, "data/raw/travel_demand/cpc_matrices_2019/demand_study_area_msoa.parquet")
+
 
 
diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R
index 9d345b8..85ea814 100644
--- a/code/x_cpc_exploration.R
+++ b/code/x_cpc_exploration.R
@@ -46,6 +46,12 @@ cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y))
 # Combine into one df
 cpc_matrices_all <- bind_rows(cpc_matrices)
 
+# replace NA values with 0
+cols <- c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb")
+
+cpc_matrices_all <- cpc_matrices_all %>%
+  mutate(across(all_of(cols), ~replace_na(., 0)))
+
 # Add column to sum all flow types
 cpc_matrices_all <- cpc_matrices_all %>%
   mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE))
@@ -84,7 +90,7 @@ study_area <- study_area %>%
 
 #  ----- filter zones to study area
 
-# filter matrices to keep study area zones only
+# filter matrices to keep study area zones only (i.e intrazonal flows only)
 cpc_matrices_all_internal <- cpc_matrices_all %>%
   filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id)
 
@@ -119,17 +125,13 @@ cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
 cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
   rename(Origin = from_msoa, Destination = to_msoa)
 
+
 # Filter matrix by distance also adds desire lines
 cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, od_matrix = cpc_matrices_all_internal, dist_threshold = 1000)
 
 
 
 
-
-
-
-
-
 map_cpc_flows = function(od_matrix,
                          area,
                          column_to_map,

From 845fdc4220e3bb65cb322c95eecd349c8286e657 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Mon, 4 Nov 2024 18:31:44 +0000
Subject: [PATCH 06/16] split points for odjitter

---
 code/demand_cluster_flows_prep.R | 127 +++++++++++++++++++++++++++----
 1 file changed, 114 insertions(+), 13 deletions(-)

diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index 0ef5388..0120d10 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -1,6 +1,6 @@
 # the purpose of this script is to prepare the flow data for spatial clustering #
 # This is done by jittering the flows so that it is not concentrated at zone centroids
-
+#
 library(tidyverse)
 library(sf)
 #library(lwgeom)
@@ -18,8 +18,7 @@ source("R/filter_od_matrix.R")
 ########## ----------------------- Read in the data ----------------------- ##########
 
 # is the data disaggregated by mode?
-mode <- TRUE
-#mode <- FALSE
+mode <- FALSE
 
 
 # ----------- 1. Study area
@@ -107,7 +106,111 @@ sub_zones <- sub_zones %>%
   rename(population = gbr_pd_2020_1km_UNadj)
 
 
-
+# --- Create more points (if we don't have enough points, then odjitter may not find a feasible solution)
+
+
+#' Split Points into Multiple Offset Points with CRS Transformation
+#'
+#' This function takes an `sf` object containing spatial points and splits each
+#' point into multiple new points that are offset from the original points.
+#' Each new point represents an equal share of a specified numeric column value,
+#' and is randomly distributed within a given distance range from the original
+#' point. The function transforms the CRS to a specified metric CRS for
+#' processing and then converts it back to the original CRS.
+#'
+#' @param sf_points An `sf` object representing multiple spatial points with
+#'   geometry and a numeric column to be split.
+#' @param col_to_split A string indicating the name of the numeric column to
+#'   be split across the new points.
+#' @param splits An integer indicating the number of new points to generate
+#'   from each original point.
+#' @param offset_dist_min A numeric value specifying the minimum distance
+#'   (in meters) for the offsets of the new points from the original points.
+#' @param offset_dist_max A numeric value specifying the maximum distance
+#'   (in meters) for the offsets of the new points from the original points.
+#' @param target_crs A numeric EPSG code or a PROJ string for the target
+#'   metric CRS to be used during processing.
+#'
+#' @return An `sf` object containing the new points, with the specified
+#'   numeric column value equally distributed among the generated points.
+#'
+#' @examples
+#' # Example usage
+#' library(sf)
+#' sf_points <- st_as_sf(data.frame(id = 1,
+#'                                   population = 100,
+#'                                   geometry = st_sfc(st_point(c(1, 1)),
+#'                                                     crs = 4326)))
+#' result <- split_points(sf_points, col_to_split = "population",
+#'                        splits = 5, offset_dist_min = 10,
+#'                        offset_dist_max = 20, target_crs = 32630)
+#' plot(st_geometry(result))
+split_points <- function(sf_points, col_to_split, splits, offset_dist_min,
+                         offset_dist_max, target_crs) {
+
+  # Ensure that the specified column exists in the sf object
+  if (!col_to_split %in% names(sf_points)) {
+    stop(paste("Column", col_to_split, "does not exist in the sf object."))
+  }
+
+  # Store the original CRS
+  original_crs <- st_crs(sf_points)
+
+  # Transform to the target CRS
+  sf_points_metric <- st_transform(sf_points, target_crs)
+
+  # Create an empty list to store new points
+  new_points_list <- vector("list", nrow(sf_points_metric))
+
+  # Loop through each point in the sf object
+  for (i in 1:nrow(sf_points_metric)) {
+    point <- sf_points_metric[i, ]
+    population <- point[[col_to_split]]  # Get the value of the specified column
+
+    # Calculate new population per point
+    new_col_to_split <- population / splits
+
+    # Generate new points with offsets
+    offsets <- lapply(1:splits, function(index) {
+      angle <- runif(1, 0, 2 * pi)  # Random angle in radians
+      offset_dist <- runif(1, min = offset_dist_min, max = offset_dist_max)
+      offset_x <- offset_dist * cos(angle)
+      offset_y <- offset_dist * sin(angle)
+      st_point(c(st_coordinates(point)[1] + offset_x,
+                 st_coordinates(point)[2] + offset_y))
+    })
+
+    # Create sf object for new points
+    new_points_sf <- st_sf(
+      # TODO: take column name from funciton argument instead of hardcoding
+      population = rep(new_col_to_split, splits),  # Dynamically assign the column name
+      geometry = st_sfc(offsets, crs = st_crs(point))
+    )
+
+    # Append the new points sf object to the list
+    new_points_list[[i]] <- new_points_sf
+  }
+
+  # Combine all new points into a single sf object
+  result_sf <- do.call(rbind, new_points_list)
+
+  # Transform back to the original CRS
+  result_sf <- st_transform(result_sf, original_crs)
+
+  return(result_sf)
+}
+
+# apply function
+sub_zones_2 <- split_points(sub_zones,
+                            col_to_split = "population",
+                            splits = 5,
+                            offset_dist_min = 10,
+                            offset_dist_max = 20,
+                            target_crs = 3857)
+
+
+
+#######
 
 ##### ----- STEP 2: Jittering
 
@@ -116,8 +219,11 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
 # confirm it's empty
 dir(tempdir())
 
+selected_combination = "pt_wkday_morning"
+
 od_demand_for_jittering <- od_demand_filtered %>%
-  select(Origin, Destination, total_flow)
+  #filter(combination == selected_combination) %>%
+  select(Origin, Destination, total_flow, combination)
 
 # arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details
 
@@ -132,7 +238,7 @@ od_demand_jittered = odjitter::jitter(
   # column with the flows (to be disaggregated)
   disaggregation_key = "total_flow",
   # What's the maximum number of trips per output OD row that's allowed?
-  disaggregation_threshold = 95,
+  disaggregation_threshold = 50,
 
   # ----- arguments for ZONES ----- #
 
@@ -142,7 +248,7 @@ od_demand_jittered = odjitter::jitter(
   # ----- Arguments for SUBPOINTS ----- #
 
   # subpoints to jitter origins and destinations to
-  subpoints = sub_zones,
+  subpoints = sub_zones_2,
   # # alternatively, define different points for origins and destinations
   # subpoints_origins = points_home,
   # subpoints_destinations = points_work,
@@ -159,7 +265,7 @@ od_demand_jittered = odjitter::jitter(
   deduplicate_pairs = TRUE
 )
 
-# # TODO: remove when jittering is fixed
+# TODO: remove when jittering is fixed
 # od_demand_jittered <- od_demand_filtered
 
 # jittered returns fractions. Round them
@@ -350,8 +456,3 @@ st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geo
 #
 # # density plot: facet by demand percentile: Keep potential_demand_equal_split = NA (replace with 0)
 #
-#
-#
-#
-#
-#

From b6496a8bf5093450faf96b1509922d53f9133728 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Tue, 5 Nov 2024 12:52:04 +0000
Subject: [PATCH 07/16] ensure flow_ID is unique

---
 code/demand_cluster_flows.R      | 20 +++++++++++++++-----
 code/demand_cluster_flows_prep.R |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index 54dbf87..e4492be 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -22,6 +22,9 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "
 # path tp save plots
 plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
 
+# what combination are we clustering
+day_time = "pt_wkday_evening"
+
 # ----------- 1. Study area
 
 # --- administrative boundaries
@@ -42,6 +45,13 @@ geoid_col = paste0(geography, "21CD")
 study_area <- study_area %>%
   relocate(all_of(geoid_col), .before = everything())
 
+# add distance_m column
+od_demand_jittered = filter_matrix_by_distance(zones = study_area,
+                                               od_matrix = od_demand_jittered %>% st_drop_geometry(),
+                                               dist_threshold = 500)
+
+od_demand_jittered = od_demand_jittered %>%
+  filter(combination == day_time)
 ########## ----------------------- Different parameter combinations  ----------------------- ##########
 
 # # 1) All flows + equal weight to origins and destinations (for flow distance)
@@ -138,10 +148,10 @@ od_demand_xyuv <- od_demand_jittered %>%
 # assign unique id to each point
 od_demand_xyuv <- od_demand_xyuv %>%
   group_by(Origin, x, y) %>%
-  mutate(O_ID = cur_group_id()) %>%
+  mutate(O_ID = paste0(cur_group_id(), "_", row_number())) %>%
   ungroup() %>%
   group_by(Destination, u, v) %>%
-  mutate(D_ID = cur_group_id()) %>%
+  mutate(D_ID = paste0(cur_group_id(), "_", row_number())) %>%
   ungroup()
 
 # assign unique id to each OD pair
@@ -261,11 +271,11 @@ w <- dist_mat %>%
   select(flow_ID) %>%
   # add commuting data to
   inner_join(od_demand_jittered %>%
-               select(flow_ID, commute_all),
+               select(flow_ID, total_flow),
              by = "flow_ID")
 
 # weight vector
-w_vec <- as.vector(w$commute_all)
+w_vec <- as.vector(w$total_flow)
 
 
 
@@ -408,7 +418,7 @@ cluster_dbscan_res <- od_demand_jittered %>%
 
 
 # save
-st_write(cluster_dbscan_res, paste0("data/processed/clustering/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, ".geojson"), delete_dsn = TRUE)
+st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"), delete_dsn = TRUE)
 
 
 
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index 0120d10..e8f840f 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -238,7 +238,7 @@ od_demand_jittered = odjitter::jitter(
   # column with the flows (to be disaggregated)
   disaggregation_key = "total_flow",
   # What's the maximum number of trips per output OD row that's allowed?
-  disaggregation_threshold = 50,
+  disaggregation_threshold = 95,
 
   # ----- arguments for ZONES ----- #
 

From b2e201b4921eb4e4d10d24417bf4098632d45134 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Tue, 5 Nov 2024 15:10:35 +0000
Subject: [PATCH 08/16] specify demand column explicitly

---
 R/dbscan_sensitivity.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/dbscan_sensitivity.R b/R/dbscan_sensitivity.R
index 1f813fa..0d5866d 100644
--- a/R/dbscan_sensitivity.R
+++ b/R/dbscan_sensitivity.R
@@ -5,13 +5,13 @@
 #' @param options_minpts a vector of options for the minPts paramter
 #' @param weights a vector where each value represents the number of people going between an OD pair (to pass to the weights argument of dbscan)
 #' @param flows the original od matrix with a column that represents flows between od pairs
-#'
+#' @param flow_column the column that represents the number of people travelling between an OD pair
 #' @return a df with columns {id} (to identify the eps and minpts used), {cluster}, {size} (no of desire lines in cluster), commuters_sum (total no. of commuters in cluster)
 #'
 #' @examples
 #'
 #' @export
-dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, weights, flows){
+dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, weights, flows, flow_column){
 
   # dataframe with all combinations of eps and minpts
   options_parameters <- tidyr::expand_grid(eps = options_epsilon, minpts = options_minpts)
@@ -53,7 +53,7 @@ dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts,
     cluster_res_i <- cluster_dbscan_res_i %>%
       st_drop_geometry() %>%
       group_by(cluster) %>%
-      summarise(size = n(), commuters_sum = sum(commute_all)) %>%
+      summarise(size = n(), commuters_sum = sum(.data[[flow_column]])) %>%
       arrange(desc(size)) %>%
       ungroup()
 

From 55975e3d93988003e8bbcba9a2ed17c6f460eff1 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Tue, 5 Nov 2024 19:16:13 +0000
Subject: [PATCH 09/16] maps edited for temporal data

---
 code/demand_cluster_flows.R               |  103 +-
 code/demand_cluster_flows_maps.R          |   23 +-
 code/demand_cluster_flows_maps_temporal.R | 2060 +++++++++++++++++++++
 code/demand_cluster_flows_prep.R          |    6 +-
 4 files changed, 2126 insertions(+), 66 deletions(-)
 create mode 100644 code/demand_cluster_flows_maps_temporal.R

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index e4492be..c8d7cb0 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -317,56 +317,57 @@ hist(distances$fds, breaks = 100)
 #
 # # ----- Sensitivity analysis(for different epsilon and minpts combinaitons)
 #
-# # function to get clustering results for many combinations
-# dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
-#                                              options_epsilon <- c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 7.5, 8, 9),
-#                                              options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 700, 1000, 1500, 3000, 5000, 10000),
-#                                              weights = w_vec,
-#                                              flows = st_drop_geometry(od_demand_jittered)
-#                                              )
-#
-#
-# arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
-# # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
-#
-#
-# # All results plotted together
-# dbscan_sensitivity_res %>%
-#   filter(cluster != 0) %>%
-#   ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-#   geom_col() +
-#   scale_y_continuous(trans='log10') +
-#   facet_wrap(~id, scales = "fixed") +
-#   labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-#        subtitle = "Parameter combinations that returned more than 1 cluster",
-#        x = "Cluster no.",
-#        y = "No. of od pairs in cluster",
-#        fill= "No. of commuters") +
-#  theme_minimal()
-#
-# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_all.png", width = 14, height = 10)
-#
-# dbscan_sensitivity_res %>%
-#   filter(cluster != 0) %>%
-#   group_by(id) %>%
-#   #mutate(clusters = n()) %>%
-#   # How many clusters have more than 5 od pairs in them?
-#   mutate(clusters = sum(size > 5)) %>%
-#   ungroup() %>%
-#   filter(clusters > 5) %>%
-#   ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-#   geom_col() +
-#   scale_y_continuous(trans='log10') +
-#   facet_wrap(~id, scales = "fixed") +
-#   labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-#        subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
-#        x = "Cluster no.",
-#        y = "No. of od pairs in cluster",
-#        fill= "No. of commuters") +
-#   theme_minimal()
-#
-# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_filtered.png", width = 14, height = 10)
-#
+# function to get clustering results for many combinations
+dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
+                                             options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9),
+                                             options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000),
+                                             weights = w_vec,
+                                             flows = st_drop_geometry(od_demand_jittered),
+                                             flow_column = "total_flow"
+                                             )
+
+
+arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet"))
+# dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
+
+
+# All results plotted together
+dbscan_sensitivity_res %>%
+  filter(cluster != 0) %>%
+  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+  geom_col() +
+  scale_y_continuous(trans='log10') +
+  facet_wrap(~id, scales = "fixed") +
+  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+       subtitle = "Parameter combinations that returned more than 1 cluster",
+       x = "Cluster no.",
+       y = "No. of od pairs in cluster",
+       fill= "No. of commuters") +
+ theme_bw()
+
+ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10)
+
+dbscan_sensitivity_res %>%
+  filter(cluster != 0) %>%
+  group_by(id) %>%
+  #mutate(clusters = n()) %>%
+  # How many clusters have more than 5 od pairs in them?
+  mutate(clusters = sum(size > 5)) %>%
+  ungroup() %>%
+  filter(clusters > 5) %>%
+  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+  geom_col() +
+  scale_y_continuous(trans='log10') +
+  facet_wrap(~id, scales = "fixed") +
+  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+       subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
+       x = "Cluster no.",
+       y = "No. of od pairs in cluster",
+       fill= "No. of commuters") +
+  theme_bw()
+
+ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10)
+
 
 
 ###  --------------------  STEP 3: Cluster  -------------------- ###
@@ -374,7 +375,7 @@ hist(distances$fds, breaks = 100)
 
 # cluster option 1: border points assigned to cluster
 cluster_dbscan = dbscan::dbscan(dist_mat,
-                                minPts = 70, # 125
+                                minPts = 50, # 125
                                 eps = 8, # 9.5
                                 #borderPoints = FALSE,
                                 weights = w_vec)
diff --git a/code/demand_cluster_flows_maps.R b/code/demand_cluster_flows_maps.R
index 5aafdd1..38edb05 100644
--- a/code/demand_cluster_flows_maps.R
+++ b/code/demand_cluster_flows_maps.R
@@ -377,7 +377,7 @@ tm_shape(study_area) +
             #panel.show = FALSE,
             panel.label.size = 1,
             panel.label.bg.color = NA,
-           # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
+            # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
             frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly
 
 map_cluster_results_bus_frac_grouped_gtfs_poly
@@ -742,7 +742,7 @@ tm_shape(study_area) +
            breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
            palette = "RdYlGn", #Accent
            alpha = 0.4,
-          # title.col = "Fraction of Bus to Car users",
+           # title.col = "Fraction of Bus to Car users",
            #title.lwd = "No. of commuters",
            legend.col.show = FALSE,
            legend.lwd.show = FALSE,
@@ -1143,7 +1143,7 @@ tm_shape(basemap_urban_rural) +
   ) +
   # ---- clusters
   # poly border
-tm_shape(clusters_vis_mode_poly %>%
+  tm_shape(clusters_vis_mode_poly %>%
              filter(cluster %in% clusters_vis_mode_poly_filt$cluster)) +
   tm_borders(col = "black",
              lwd = 3,
@@ -1154,9 +1154,9 @@ tm_shape(clusters_vis_mode_poly %>%
             nrow = rows,
             showNA = FALSE) +
   # poly fill
-# tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
-#              filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
-tm_shape(clusters_vis_mode_poly_filt %>%
+  # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
+  #              filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
+  tm_shape(clusters_vis_mode_poly_filt %>%
              st_buffer(1000)) +
   tm_borders(col = "darkgreen",
              lwd = 2) +
@@ -1221,9 +1221,9 @@ tm_shape(basemap_urban_rural) +
   tm_shape(st_union(clusters_vis_mode_poly_filt %>%
                       mutate(area = st_area(.)) %>%
                       filter(area > 0.2 * mean(area)))) +
-    tm_borders(col = "darkgreen",
-               lwd = 3.5,
-               lty = "dashed") +
+  tm_borders(col = "darkgreen",
+             lwd = 3.5,
+             lty = "dashed") +
   tm_layout(fontfamily = 'Georgia',
             main.title = paste0("Potential DRT Service Areas"),
             main.title.size = 1.1,
@@ -1487,8 +1487,8 @@ ggplot(clusters_ur_poly_combined) +
                     #direction = -1,
                     labels = function(x) str_wrap(x, width = 25)) +
   scale_color_manual(values = colors_urban_rural,
-                    #direction = -1,
-                    labels = function(x) str_wrap(x, width = 25)) +
+                     #direction = -1,
+                     labels = function(x) str_wrap(x, width = 25)) +
   labs(x = "",
        y = "Area covered by cluster (km2)",
        fill = "Rural / Urban Classification",
@@ -1507,4 +1507,3 @@ ggplot(clusters_ur_poly_combined) +
   facet_wrap(facets = vars(cluster))
 
 ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-
diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R
new file mode 100644
index 0000000..467db3c
--- /dev/null
+++ b/code/demand_cluster_flows_maps_temporal.R
@@ -0,0 +1,2060 @@
+##########################################################################################
+### The purpose of this script is to visualize the results from demand_cluster_flows.R ###
+##########################################################################################
+
+library(tidyverse)
+library(sf)
+library(lwgeom)
+library(geos)
+# library(spatstat)
+library(tmap)
+
+source("R/study_area_geographies.R")
+source("R/filter_od_matrix.R")
+
+
+
+# ------------------------- Define the scenario ------------------------- #
+
+scenario <- 3 # 3, 2
+clustering <- "equal"
+distance_threshold <- 50000   # 10000
+
+# save plots?
+save = TRUE
+
+#  ------------------------ Load in the data  -------------------------- #
+
+# ----- Clustering results
+day_time = "pt_wkday_evening"
+cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"))
+
+
+# ----- Study area
+
+# ----------- 1. Study area
+
+# --- administrative boundaries
+study_area <- st_read("data/interim/study_area_boundary.geojson")
+
+# convert to desired resolution
+geography = "MSOA"
+study_area = study_area_geographies(study_area = study_area,
+                                    geography = geography)
+
+study_area <- study_area %>%
+  st_cast("MULTIPOLYGON")
+
+# move the geographic ID to the first column. od::points_to_od() only keeps the first column as ID
+
+geoid_col = paste0(geography, "21CD")
+
+study_area <- study_area %>%
+  relocate(all_of(geoid_col), .before = everything())
+
+
+plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/")
+
+
+# ------------------------- VISUALISE RESULTS ------------------------- #
+
+cluster_dbscan_res = cluster_dbscan_res %>%
+  rename(commute_all = total_flow)
+
+
+# check size per cluster and total commuters per cluster
+cluster_dbscan_res %>%
+  st_drop_geometry() %>%
+  group_by(cluster) %>%
+  summarise(size = n(), commuters_sum = sum(commute_all)) %>%
+  arrange(desc(size))
+
+# add size and total commuters columns
+cluster_dbscan_res <- cluster_dbscan_res %>%
+  group_by(cluster) %>%
+  mutate(size = n(),
+         commuters_sum = sum(commute_all)) %>%
+  ungroup()
+
+# plot
+plot(cluster_dbscan_res["cluster"])
+
+# ----- assign number of rows dynamically for facet plot
+
+# get clusters to map
+cluster_dbscan_res %>%
+  filter(size > 20, size < 5000) %>%
+  filter(commuters_sum > 200) %>%
+  filter(cluster != 0) -> clusters_vis
+
+
+# add alternative cluster labels
+clusters_vis <- clusters_vis %>%
+  rename(cluster_orig = cluster) %>%
+  arrange(commuters_sum) %>%
+  group_by(cluster_orig) %>%
+  mutate(cluster = cur_group_id()) %>%
+  ungroup()
+
+# we want maximum 4 maps per row
+rows <- round(length(unique(clusters_vis$cluster)) / 3)
+
+
+##### ---------- MAPS ---------- #####
+
+### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ###
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  tm_shape(clusters_vis) +
+  tm_lines(lwd = "commute_all",
+           col = "cluster",
+           #col = "darkgreen",
+           scale = 10,
+           palette = "Accent", #YlGn
+           #style = "pretty",
+           alpha = 1,
+           title.col = "Cluster",
+           title.lwd = "No. of commuters",
+           legend.col.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            legend.outside = TRUE,
+            legend.outside.position = "bottom",
+            legend.stack = "horizontal",
+            # remove panel headers
+            panel.show = FALSE,
+            frame = FALSE) -> map_cluster_results
+
+map_cluster_results
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_", day_time, "_.png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+### ---------- Plot 2: Compare mode composition of each cluster (desire line level) ---------- ###
+
+clusters_vis_mode <- clusters_vis
+
+### ---------- Plot 3: Compare mode composition of each cluster (cluster level) ---------- ###
+
+
+# Get mode composition of entire cluster
+
+clusters_vis_mode_summary <- clusters_vis_mode %>%
+  st_drop_geometry() %>%
+  group_by(cluster, size) %>%
+  summarise(across(starts_with("commute_"), sum)) %>%
+  ungroup()
+
+# add commuting fraction
+clusters_vis_mode <- clusters_vis_mode %>%
+  inner_join(clusters_vis_mode_summary %>% select(cluster), by = "cluster")
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  tm_shape(clusters_vis_mode) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 10,
+           palette = "RdYlGn", #Accent
+           #style = "pretty",
+           alpha = 1,
+           title.col = "Travel demand",
+           title.lwd = "No. of commuters",
+           #legend.col.is.portrait = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            # legend.outside = TRUE,
+            # legend.outside.position = "bottom",
+            # legend.stack = "horizontal",
+            # remove panel headers
+            panel.show = FALSE,
+            frame = FALSE) -> map_cluster_results_bus_frac_grouped
+
+map_cluster_results_bus_frac_grouped
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+# --- Map with clusters ovelayed onto bus routes
+
+
+
+gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf.geojson")
+
+# gtfs_bus <- gtfs_bus %>%
+#   st_filter(st_union(study_area), .predicate = st_within)
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey75",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # clusters
+  tm_shape(clusters_vis_mode) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 10,
+           palette = "RdYlGn", #Accent
+           #style = "pretty",
+           alpha = 1,
+           title.col = "Travel demand",
+           title.lwd = "No. of commuters",
+           #legend.col.is.portrait = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            panel.show = FALSE,
+            frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs
+
+map_cluster_results_bus_frac_grouped_gtfs
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+# --- Map with clusters as polygons (convex_hull())
+
+# turn clusters into polygons using convex hull
+
+clusters_vis_mode_poly <- clusters_vis_mode %>%
+  filter(size > 7, size < 5000) %>%
+  filter(commuters_sum > 200) %>%
+  filter(cluster != 0) %>%
+  mutate(cluster = as.factor(cluster)) %>%
+  #head(1000) %>%
+  group_by(cluster) %>%
+  summarize(commuters_sum = first(commuters_sum),
+            geometry = st_convex_hull(st_union(geometry))) %>%
+  ungroup()
+
+
+# plot
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # clusters
+  tm_shape(clusters_vis_mode_poly) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              #style = "pretty",
+              alpha = 0.3,
+              title = "Travel demand",
+              #legend.col.is.portrait = FALSE,
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            #panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
+            frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly
+
+map_cluster_results_bus_frac_grouped_gtfs_poly
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+# --- Map with clusters as polygons (convex_hull()) + lines in background
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # lines
+  tm_shape(clusters_vis_mode) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 5,
+           # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+           palette = "RdYlGn", #Accent
+           alpha = 0.4,
+           title.col = "Travel demand",
+           #title.lwd = "No. of commuters",
+           #legend.col.show = FALSE,
+           legend.lwd.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly
+tm_shape(clusters_vis_mode_poly) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Fraction of Bus \nto Car users",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
+            #panel.show = FALSE,
+            frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_lines
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+# ------ Map with clusters as polygons (convex_hull()) + lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS
+
+
+# GTFS: Approach 1 - st_union()
+# get high frequency buses
+gtfs_bus_freq <- gtfs_bus  %>%
+  filter(scenario == day_time) %>%
+  mutate(headway_inv = (1/headway_secs) * 3600) %>%
+  filter(headway_secs < 3600) %>%
+  # this is necessary so that geos::geos_concave_hull gives useful results later
+  st_filter(st_union(study_area), .predicate = st_within) %>%
+  st_make_valid()
+
+# convert to one geom in order to intersect by
+gtfs_bus_freq1 <- gtfs_bus_freq %>%
+  st_transform(3857) %>%
+  st_buffer(1000) %>%
+  st_union()
+
+clusters_vis_mode_poly_filt <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq1)
+
+
+
+# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon
+clusters_vis_mode_poly_filt <- clusters_vis_mode_poly_filt %>%
+  st_cast("MULTIPOLYGON") %>%
+  st_cast("POLYGON") %>%
+  st_make_valid()
+
+# retain largest poly in each cluster
+clusters_vis_mode_poly_filt_max <- clusters_vis_mode_poly_filt %>%
+  mutate(area = st_area(.)) %>%
+  group_by(cluster) %>%
+  filter(area == max(area))
+
+# convex hull for aesthetic
+clusters_vis_mode_poly_filt_max <- st_convex_hull(clusters_vis_mode_poly_filt_max)
+
+
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # lines
+  tm_shape(clusters_vis_mode %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 5,
+           # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+           palette = "RdYlGn", #Accent
+           alpha = 0.4,
+           title.col = "Travel demand",
+           #title.lwd = "No. of commuters",
+           #legend.col.show = FALSE,
+           legend.lwd.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt_max %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt_max$cluster)),
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed")-> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt_max %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt_max$cluster)),
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed")  -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+# --- Map with clusters as polygons (convex_hull()) + lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS
+
+
+# GTFS: APPROACH 2a: concave_hull() AFTER st_union()
+gtfs_bus_freq2 <- gtfs_bus_freq  %>%
+  st_transform(3857) %>%
+  #st_union() %>%
+  geos::geos_concave_hull(ratio = 0.5, allow_holes = FALSE) %>%
+  st_as_sf() %>%
+  st_union() %>%
+  st_set_crs(3857) %>%
+  st_buffer(250)
+
+
+# st_difference to get non overlapping geoms
+clusters_vis_mode_poly_filt2 <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq2)
+
+
+
+# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon
+clusters_vis_mode_poly_filt2 <- clusters_vis_mode_poly_filt2 %>%
+  st_cast("MULTIPOLYGON") %>%
+  st_cast("POLYGON") %>%
+  st_make_valid()
+
+# union geoms per cluster
+clusters_vis_mode_poly_filt2 <- clusters_vis_mode_poly_filt2 %>%
+  group_by(cluster, commuters_sum) %>%
+  summarise(geometry = st_union(geometry)) %>%
+  ungroup()
+
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # lines
+  tm_shape(clusters_vis_mode %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt2$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             arrange(commute_all)) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 5,
+           # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+           palette = "RdYlGn", #Accent
+           alpha = 0.4,
+           title.col = "Travel demand",
+           #title.lwd = "No. of commuters",
+           #legend.col.show = FALSE,
+           legend.lwd.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly border
+  tm_shape(clusters_vis_mode_poly  %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt2$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt2 %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt2$cluster)),
+            frame = FALSE) +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+
+
+# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS  ----- CONCAVE HULL
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly border
+  tm_shape(clusters_vis_mode_poly  %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt2$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt2 %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt2$cluster)),
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+# GTFS: APPROACH 2b: concave_hull() AFTER st_union()
+gtfs_bus_freq3 <- gtfs_bus_freq  %>%
+  st_transform(3857) %>%
+  st_buffer(100) %>%
+  st_union() %>%
+  geos::geos_concave_hull(ratio = 0.25, allow_holes = TRUE) %>%
+  st_as_sf() %>%
+  st_set_crs(3857) %>%
+  st_make_valid()
+
+
+# st_difference to get non overlapping geoms
+clusters_vis_mode_poly_filt3 <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq3)
+
+
+
+# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon
+clusters_vis_mode_poly_filt3 <- clusters_vis_mode_poly_filt3 %>%
+  st_cast("MULTIPOLYGON") %>%
+  st_cast("POLYGON") %>%
+  st_make_valid()
+
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus_freq %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # lines
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             arrange(commuters_sum)) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 5,
+           # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+           palette = "RdYlGn", #Accent
+           alpha = 0.4,
+           title.col = "Travel demand",
+           #title.lwd = "No. of commuters",
+           legend.col.show = FALSE,
+           legend.lwd.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt3 %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)),
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+# --- Same as above but with startpoint and endpoint labelled
+
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus_freq %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # lines
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             arrange(commuters_sum)) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 5,
+           # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+           palette = "RdYlGn", #Accent
+           alpha = 0.4,
+           title.col = "Travel demand",
+           #title.lwd = "No. of commuters",
+           #legend.col.show = FALSE,
+           legend.lwd.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # START AND ENDPOINTS
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             mutate(geometry = st_startpoint(.)) %>%
+             arrange(commuters_sum)) +
+  tm_dots(col = "darkgreen",
+          alpha = 0.6,
+          jitter = 0.01,
+          size = "commute_all",
+          scale = 0.8,
+          legend.size.show = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             mutate(geometry = st_endpoint(.)) %>%
+             arrange(commuters_sum)) +
+  tm_dots(col = "darkred",
+          alpha = 0.6,
+          jitter = 0.01,
+          size = "commute_all",
+          scale = 0.8,
+          legend.size.show = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt3 %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") +
+  tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') +
+  tm_add_legend(type = "symbol", labels = 'OD End', col = 'darkred') -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+# Presentation Style - 2 Rows Only
+
+rows_ppt = 3
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # lines
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             arrange(commuters_sum)) +
+  tm_lines(lwd = "commute_all",
+           col = "commute_all",
+           scale = 5,
+           # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+           palette = "RdYlGn", #Accent
+           alpha = 0.4,
+           title.col = "Travel demand",
+           #title.lwd = "No. of commuters",
+           legend.col.show = FALSE,
+           legend.lwd.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows_ppt,
+            showNA = FALSE) +
+  # START AND ENDPOINTS
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             mutate(geometry = st_startpoint(.)) %>%
+             arrange(commuters_sum)) +
+  tm_dots(col = "darkgreen",
+          alpha = 0.6,
+          jitter = 0.01,
+          size = "commute_all",
+          scale = 1,
+          legend.size.show = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows_ppt,
+            showNA = FALSE) +
+  tm_shape(clusters_vis_mode %>%
+             filter(size > 7, size < 5000) %>%
+             filter(commuters_sum > 200) %>%
+             filter(cluster != 0) %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             mutate(cluster = as.factor(cluster)) %>%
+             mutate(geometry = st_endpoint(.)) %>%
+             arrange(commuters_sum)) +
+  tm_dots(col = "darkred",
+          alpha = 0.6,
+          jitter = 0.01,
+          size = "commute_all",
+          scale = 1,
+          legend.size.show = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows_ppt,
+            showNA = FALSE) +
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows_ppt,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt3 %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows_ppt,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") +
+  tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') +
+  tm_add_legend(type = "symbol", labels = 'OD End', col = 'darkred') -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS  ----- CONCAVE HULL
+
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  # bus layer
+  tm_shape(gtfs_bus_freq %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "grey70",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) +
+  tm_borders(col = "black",
+             lwd = 2,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  tm_shape(clusters_vis_mode_poly_filt3 %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
+             st_buffer(1000)) +
+  tm_polygons(col = "commuters_sum",
+              palette = "RdYlGn", #Accent
+              # # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
+              #style = "pretty",
+              alpha = 0.2,
+              title = "Travel demand \n(in potential \nDRT zone)",
+              # remove "missing from legend
+              showNA = FALSE) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)),
+            frame = FALSE) +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+########## - ADD RURAL URBAN AS BASE MAP
+
+# --- 2011 map and urban rural classification
+basemap <- st_read("data/external/oa_england_2011/OA_2011_EW_BGC_V2.shp") %>%
+  st_transform(st_crs(study_area)) %>%
+  st_make_valid()
+
+urban_rural_csv <- read_csv("data/external/census_2011_rural_urban.csv")
+
+
+# add numeric columns for coloring
+urban_rural_csv <- urban_rural_csv %>%
+  mutate(RUC11CD_NM = case_when(RUC11 == "Urban major conurbation" ~ 10,
+                                RUC11 == "Urban minor conurbation" ~ 9,
+                                RUC11 == "Urban city and town" ~ 8,
+                                RUC11 == "Urban city and town in a sparse setting" ~ 7,
+                                RUC11 == "Rural town and fringe"  ~ 6,
+                                RUC11 == "Rural town and fringe in a sparse setting" ~ 5,
+                                RUC11 == "Rural village" ~ 4,
+                                RUC11 == "Rural village in a sparse setting" ~ 3,
+                                RUC11 == "Rural hamlets and isolated dwellings" ~ 2,
+                                RUC11 == "Rural hamlets and isolated dwellings in a sparse setting" ~ 1)) %>%
+  arrange(RUC11CD_NM) %>%
+  # turn column to factor
+  mutate(RUC11CD_NM_FCT = as.factor(RUC11CD_NM))
+# join
+basemap_urban_rural <- basemap %>%
+  left_join(urban_rural_csv, by = "OA11CD")
+
+
+# --- filter to study area
+basemap_urban_rural <- basemap_urban_rural %>%
+  st_filter(st_union(study_area) %>%
+              st_make_valid(),
+            .predicate = st_intersects)
+
+
+
+
+
+
+tm_shape(basemap_urban_rural) +
+  tm_fill(col = "RUC11CD_NM",
+          palette = "Greys",
+          alpha = 0.5,
+          title = "Level of \nUrbanisation") +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "darkred",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 0.1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly border
+  tm_shape(clusters_vis_mode_poly %>%
+             filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) +
+  tm_borders(col = "black",
+             lwd = 3,
+             lty = "dashed") +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  # poly fill
+  # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
+  #              filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
+  tm_shape(clusters_vis_mode_poly_filt3 %>%
+             st_buffer(1000)) +
+  tm_borders(col = "darkgreen",
+             lwd = 2) +
+  tm_facets(by = "cluster",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)),
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") +
+  tm_add_legend(type = "line", labels = 'Potential DRT\nservice area', col = 'darkgreen', lwd = 2) -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+# ---------------------- ONE BIG MAP
+
+
+tm_shape(basemap_urban_rural) +
+  tm_fill(col = "RUC11CD_NM",
+          palette = "Greys",
+          alpha = 0.5,
+          title = "Level of \nUrbanisation") +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(scenario == day_time) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "darkred",
+           lwd = "headway_inv",
+           scale = 5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 0.1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly fill
+  #tm_shape(st_union(clusters_vis_mode_poly_filt3)) +
+  tm_shape(st_union(clusters_vis_mode_poly_filt3 %>%
+                      mutate(area = st_area(.)) %>%
+                      filter(area > 0.2 * mean(area)))) +
+  tm_borders(col = "darkgreen",
+             lwd = 3.5,
+             lty = "dashed") +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Potential DRT Service Areas"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+# ----- ONE BIG MAP (gtfs routes overlined)
+
+gtfs_bus_overline <- gtfs_bus %>%
+  filter(scenario == day_time) %>%
+  mutate(headway_inv = (1/headway_secs) * 3600) %>%
+  filter(headway_secs < 7200)
+
+gtfs_bus_overline = stplanr::overline(gtfs_bus_overline, attrib = "headway_inv", fun = sum)
+
+gtfs_bus_overline  = gtfs_bus_overline %>%
+  mutate(headway_inv = round(headway_inv))
+
+
+tm_shape(basemap_urban_rural) +
+  tm_fill(col = "RUC11CD_NM",
+          palette = "Greys",
+          alpha = 0.5,
+          title = "Level of \nUrbanisation") +
+  # bus layer
+  tm_shape(gtfs_bus_overline) +
+  tm_lines(col = "darkred",
+           lwd = "headway_inv",
+           scale = 20,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 0.7,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly fill
+  #tm_shape(st_union(clusters_vis_mode_poly_filt3)) +
+  tm_shape(st_union(clusters_vis_mode_poly_filt3 %>%
+                      mutate(area = st_area(.)) %>%
+                      filter(area > 0.2 * mean(area)))) +
+  tm_borders(col = "darkgreen",
+             lwd = 3.5,
+             lty = "dashed") +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Potential DRT Service Areas"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+# ----- ONE BIG MAP (pop density as background)
+
+
+# --- pop density 1km grid
+oa_pop_density <- stars::read_stars("data/external/population_density_grid_uk/gbr_pd_2020_1km_UNadj.tif")
+# crop to study area
+oa_pop_density_crop <- st_crop(oa_pop_density, study_area)
+
+tm_shape(st_union(study_area)) +
+  tm_borders(lwd =2,
+             col = "grey15") +
+  tm_shape(oa_pop_density_crop) +
+  tm_raster(title = "People / Km2",
+            palette = "Blues",
+            alpha = 0.5,
+            style = "log10_pretty") + #log10_pretty #jenks
+  # bus layer
+  tm_shape(gtfs_bus_overline) +
+  tm_lines(col = "darkred",
+           lwd = "headway_inv",
+           scale = 20,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 0.6,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  # ---- clusters
+  # poly fill
+  #tm_shape(st_union(clusters_vis_mode_poly_filt3)) +
+  tm_shape(st_union(clusters_vis_mode_poly_filt3 %>%
+                      mutate(area = st_area(.)) %>%
+                      filter(area > 0.2 * mean(area)))) +
+  tm_borders(col = "darkgreen",
+             lwd = 4,
+             lty = "dashed") +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Potential DRT Service Areas"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline
+
+map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline
+
+if(save == TRUE){
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+##### ---------- FIGURES ---------- #####
+
+### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering
+
+# is the data disaggregated by mode?
+mode <- TRUE
+#mode <- FALSE
+
+# Demand (census) + supply (travel time) data
+
+if(mode == FALSE){
+  # data with "commute_all" only
+  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
+} else{
+  # data with modes
+  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
+}
+
+# filter to specific combination
+# TODO: get seperate flows for car and pt, and keep two combinations
+od_demand <- od_demand %>%
+  filter(combination == "pt_wkday_morning")
+
+od_demand <- od_demand %>%
+  select(-distance_m)
+
+# --- create desire lines and remove od pairs with very short distance
+
+# TODO: edit this to avoid clusters of very short flows
+# "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid
+# clusters of very short flows. this involves splitting the data into 3 chunks
+# based on length (
+od_demand_filtered = filter_matrix_by_distance(zones = study_area,
+                                               od_matrix = od_demand,
+                                               dist_threshold = 1000)
+
+# add unique id for each row
+od_demand_filtered <- od_demand_filtered %>%
+  mutate(od_id = paste0(Origin, "-", Destination, "-", combination))
+
+
+
+
+# ----- add the clustering results to the unjittered demand
+od_demand_figures <- cluster_dbscan_res %>%
+  select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>%
+  left_join(od_demand_filtered %>%
+              st_drop_geometry() %>%
+              distinct(od_id, .keep_all = TRUE) %>%
+              # travel times before being ruined by od_jitter()
+              select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")),
+            by = "od_id")
+
+# match existing clusters
+# od_demand_figures_filt <- od_demand_figures %>%
+#   filter(size > 7, size < 5000) %>%
+#   filter(commute_all > 200) %>%
+#   filter(cluster != 0)
+
+od_demand_figures_filt <- od_demand_figures %>%
+  rename(cluster_orig = cluster) %>%
+  inner_join(clusters_vis %>%
+               st_drop_geometry() %>%
+               select(flow_ID, cluster),
+             by = "flow_ID") %>%
+  # keep only the clusters after intersection with gtfs
+  filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)
+
+
+
+
+# ----- plots
+
+# scatter plots with x: commute_all, y: fraction of bus commuters
+ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) +
+  geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
+  geom_point() +
+  ylim(0, 5) +
+  scale_color_distiller(palette= "RdYlGn", direction = 1) +
+  labs(x = "total no. of commuters",
+       y = "Bus / Car commuters (fraction)",
+       title = "Composition of clusters",
+       subtitle = "No. of commuters",
+       color = "Commuters (bus)") +
+  facet_wrap(facets = vars(cluster)) +
+  theme_bw() +
+  theme(legend.position = "bottom")
+
+ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+
+# paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0)
+
+
+ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) +
+  geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
+  geom_point() +
+  ylim(0, 5) +
+  scale_color_distiller(palette= "RdYlGn", direction = 1) +
+  labs(x = "total no. of commuters",
+       y = "Bus / Car commuters (fraction)",
+       color = "Average speed of \nbus commute (kph)",
+       title = "Composition of clusters: \nNo. of commuters in each OD pair") +
+  facet_wrap(facets = vars(cluster)) +
+  theme_bw() +
+  theme(legend.position = "bottom")
+
+ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+
+# same but replacing NA speed for min speed
+ggplot(od_demand_figures_filt %>%
+         st_drop_geometry() %>%
+         mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))),
+       aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) +
+  geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
+  geom_point() +
+  ylim(0, 5) +
+  scale_color_distiller(palette= "RdYlGn", direction = 1) +
+  labs(x = "total no. of commuters",
+       y = "Bus / Car commuters (fraction)",
+       color = "Average speed of \nbus commute (kph)",
+       title = "Composition of clusters: \nNo. of commuters in each OD pair") +
+  facet_wrap(facets = vars(cluster)) +
+  theme_bw() +
+  theme(legend.position = "bottom")
+
+ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6)
+
+# ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?)
+
+# get intersection
+clusters_vis_mode_poly %>%
+  st_intersection(basemap_urban_rural %>%
+                    st_transform(st_crs(clusters_vis_mode_poly))) %>%
+  # get area
+  mutate(area_km = as.numeric(st_area(.) / 1000000))  %>%
+  # area by cluster
+  st_drop_geometry() %>%
+  group_by(cluster, RUC11, RUC11CD_NM_FCT) %>%
+  summarise(area_km = sum(area_km)) %>%
+  # keep only the clusters after intersection with gtfs
+  filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly
+
+# define custom color paletter
+colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A"))
+# order column based on the RUC11CD_NM_FCT column (degree of urbanisation)
+clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)])
+
+ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) +
+  geom_col(color = "grey60") +
+  scale_fill_manual(values = colors_urban_rural,
+                    labels = function(x) str_wrap(x, width = 25)) +
+  labs(x = "",
+       y = "Area covered by cluster (km2)",
+       color = "Rural / Urban Classification",
+       title = "Composition of clusters: Rural / Urban") +
+  theme_bw() +
+  theme(axis.text.x = element_blank(),
+        axis.ticks.x=element_blank(),
+        legend.position = "bottom",
+        legend.title = element_blank()) +
+  guides(fill = guide_legend(nrow = 3)) +
+  facet_wrap(facets = vars(cluster))
+
+ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+
+
+# Same but for filtered polygon
+
+clusters_vis_mode_poly_filt3 %>%
+  st_intersection(basemap_urban_rural %>%
+                    st_transform(st_crs(clusters_vis_mode_poly))) %>%
+  # get area
+  mutate(area_km = as.numeric(st_area(.) / 1000000))  %>%
+  # area by cluster
+  st_drop_geometry() %>%
+  group_by(cluster, RUC11, RUC11CD_NM_FCT) %>%
+  summarise(area_km = sum(area_km))  %>%
+  # keep only the clusters after intersection with gtfs
+  filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt
+
+clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)])
+
+
+ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) +
+  geom_col(color = "grey60") +
+  scale_fill_manual(values = colors_urban_rural,
+                    #direction = -1,
+                    labels = function(x) str_wrap(x, width = 25)) +
+  labs(x = "",
+       y = "Area covered by cluster (km2)",
+       color = "Rural / Urban Classification",
+       title = "Composition of clusters: Rural / Urban") +
+  theme_bw() +
+  theme(axis.text.x = element_blank(),
+        axis.ticks.x=element_blank(),
+        legend.position = "bottom",
+        legend.title = element_blank()) +
+  guides(fill = guide_legend(nrow = 3)) +
+  facet_wrap(facets = vars(cluster))
+
+ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+
+
+
+
+### ---- join the data to plot together
+clusters_ur_poly %>%
+  left_join(clusters_ur_poly_filt %>%
+              rename(area_km_filt = area_km),
+            by = c("cluster", "RUC11")) -> clusters_ur_poly_combined
+
+
+
+# --- plot both together
+ggplot(clusters_ur_poly_combined) +
+  geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) +
+  geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) +
+  scale_fill_manual(values = colors_urban_rural,
+                    #direction = -1,
+                    labels = function(x) str_wrap(x, width = 25)) +
+  scale_color_manual(values = colors_urban_rural,
+                     #direction = -1,
+                     labels = function(x) str_wrap(x, width = 25)) +
+  labs(x = "",
+       y = "Area covered by cluster (km2)",
+       fill = "Rural / Urban Classification",
+       title = "Composition of clusters: Rural / Urban",
+       subtitle = "Before and after intersecting with bus network",
+       caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") +
+  guides(color = "none", alpha = "none") +
+  scale_alpha_identity() +  # Maintain alpha value
+  theme_bw() +
+  theme(axis.text.x = element_blank(),
+        axis.ticks.x=element_blank(),
+        legend.position = "bottom",
+        legend.title = element_blank(),
+        plot.caption = element_text(hjust = 0)) +
+  guides(fill = guide_legend(nrow = 3)) +
+  facet_wrap(facets = vars(cluster))
+
+ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+
+
+
+
+
+# ---------- PLots of line bearings in each cluster
+
+# --- calculate bearings
+
+od_demand_figures_bearings <- od_demand_figures_filt %>%
+  mutate(bearing = stplanr::line_bearing(.),
+         bearing_adjusted = case_when(bearing < 0 ~ bearing + 360,
+                                      .default =  bearing),
+         bearing_180 = case_when(bearing < 0 ~ bearing * -1,
+                                 .default =  bearing))
+
+
+
+# add length column
+od_demand_figures_bearings <- od_demand_figures_bearings %>%
+  mutate(distance_m =  units::drop_units(sf::st_length(.)))
+
+# --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots
+
+
+# Define the breaks for the buckets
+#breaks_angle <- seq(-10, 370, by = 20)
+breaks_angle <- seq(0, 360, by = 30)
+#breaks_angle <- seq(0, 180, by = 15)
+breaks_distance <- seq(0, 50, by = 10)
+
+od_demand_figures_bearings <- od_demand_figures_bearings %>%
+  mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE),
+         # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE),
+         bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>%
+  mutate(bucket_distance = fct_rev(bucket_distance))
+
+# Ensure the bucket factor levels are ordered correctly
+#bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE)
+
+
+# --- Plot with no of lines only (geom_bar())
+
+
+# bar
+ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) +
+  geom_bar() +
+  facet_wrap(facets = "cluster") +
+  labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") +
+  scale_x_discrete(labels = breaks_angle) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1),
+        legend.position = "bottom")
+
+ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8)
+
+
+# circle
+ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) +
+  geom_bar() +
+  coord_polar(start = -0.1) +
+  facet_wrap(facets = "cluster") +
+  labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") +
+  #scale_x_discrete(labels = breaks_angle) +
+  scale_x_discrete(labels = breaks_angle) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+
+
+# --- Plot with total commuters geom_col()
+
+
+# bar
+ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
+  geom_col() +
+  #coord_polar() +
+  facet_wrap(facets = "cluster") +
+  labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") +
+  scale_x_discrete(labels = breaks_angle) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1),
+        legend.position = "bottom")
+
+ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8)
+
+# circle
+ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
+  geom_col() +
+  coord_radial(rotate.angle = TRUE) +
+  facet_wrap(facets = "cluster") +
+  labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") +
+  #scale_x_discrete(labels = breaks_angle) +
+  scale_x_discrete(labels = breaks_angle) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+
+
+
+# TESTING FACET GRID
+
+# Select labels to print (one in every three)
+labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)]
+
+
+ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
+  geom_col() +
+  #coord_polar() +
+  facet_grid(bucket_distance ~ cluster,
+             drop = TRUE) +
+  labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") +
+  # scale_x_discrete(labels = breaks_angle) +
+  scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() +
+  theme_bw() +
+  theme(axis.text.x = element_text(angle = 60, hjust = 1),
+        legend.position = "bottom")
+
+
+ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10)
+
+
+
+
+
+
+
+
+
+
+
+
+
+clusters_vis_sp = clusters_vis %>%
+  mutate(geometry = st_startpoint(.))
+
+clusters_vis_ep = clusters_vis %>%
+  mutate(geometry = st_endpoint(.))
+
+tm_shape(study_area) +
+  tm_borders(col = "grey60",
+             alpha = 0.5) +
+  tm_shape(study_area) +
+  tm_fill(col = "grey95",
+          alpha = 0.5) +
+  tm_shape(clusters_vis) +
+  tm_lines(lwd = "commute_all",
+           col = "cluster",
+           #col = "darkgreen",
+           scale = 10,
+           palette = "Accent", #YlGn
+           #style = "pretty",
+           alpha = 1,
+           title.col = "Cluster",
+           title.lwd = "No. of commuters",
+           legend.col.show = FALSE,
+           # remove "missing from legend
+           showNA = FALSE) +
+  tm_facets(by = "cluster",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_shape(clusters_vis_sp) +
+  tm_dots(col = "black",
+          #col = "darkgreen",
+          shape = 0,
+          scale = 2,
+          palette = "Accent", #YlGn
+          #style = "pretty",
+          alpha = 1,
+          # remove "missing from legend
+          showNA = FALSE) +
+  tm_facets(by = "cluster",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+  tm_shape(clusters_vis_ep) +
+  tm_dots(col = "darkred",
+          #col = "darkgreen",
+          shape = 6,
+          scale = 2,
+          palette = "Accent", #YlGn
+          #style = "pretty",
+          alpha = 1,
+          # remove "missing from legend
+          showNA = FALSE) +
+  tm_facets(by = "cluster",
+            free.coords = FALSE,
+            nrow = rows,
+            showNA = FALSE) +
+
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            legend.outside = TRUE,
+            legend.outside.position = "bottom",
+            legend.stack = "horizontal",
+            # remove panel headers
+            panel.show = FALSE,
+            frame = FALSE) -> map_cluster_results
+
+map_cluster_results
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index e8f840f..c7029bc 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -204,8 +204,8 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min,
 sub_zones_2 <- split_points(sub_zones,
                             col_to_split = "population",
                             splits = 5,
-                            offset_dist_min = 10,
-                            offset_dist_max = 20,
+                            offset_dist_min = 100,
+                            offset_dist_max = 500,
                             target_crs = 3857)
 
 
@@ -219,7 +219,7 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
 # confirm it's empty
 dir(tempdir())
 
-selected_combination = "pt_wkday_morning"
+selected_combination = "pt_wkday_evening"
 
 od_demand_for_jittering <- od_demand_filtered %>%
   #filter(combination == selected_combination) %>%

From bc92569d39bc3ab03a792985a60b9928a12fe092 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Tue, 5 Nov 2024 20:39:51 +0000
Subject: [PATCH 10/16] updates

---
 code/demand_cluster_flows.R      | 103 ++++++++++++++++---------------
 code/demand_cluster_flows_prep.R |   4 +-
 2 files changed, 56 insertions(+), 51 deletions(-)

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index c8d7cb0..762b013 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -24,6 +24,8 @@ plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
 
 # what combination are we clustering
 day_time = "pt_wkday_evening"
+# sensitivity analysis?
+sensitivity = FALSE
 
 # ----------- 1. Study area
 
@@ -318,55 +320,58 @@ hist(distances$fds, breaks = 100)
 # # ----- Sensitivity analysis(for different epsilon and minpts combinaitons)
 #
 # function to get clustering results for many combinations
-dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
-                                             options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9),
-                                             options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000),
-                                             weights = w_vec,
-                                             flows = st_drop_geometry(od_demand_jittered),
-                                             flow_column = "total_flow"
-                                             )
-
-
-arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet"))
-# dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
-
-
-# All results plotted together
-dbscan_sensitivity_res %>%
-  filter(cluster != 0) %>%
-  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-  geom_col() +
-  scale_y_continuous(trans='log10') +
-  facet_wrap(~id, scales = "fixed") +
-  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-       subtitle = "Parameter combinations that returned more than 1 cluster",
-       x = "Cluster no.",
-       y = "No. of od pairs in cluster",
-       fill= "No. of commuters") +
- theme_bw()
-
-ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10)
-
-dbscan_sensitivity_res %>%
-  filter(cluster != 0) %>%
-  group_by(id) %>%
-  #mutate(clusters = n()) %>%
-  # How many clusters have more than 5 od pairs in them?
-  mutate(clusters = sum(size > 5)) %>%
-  ungroup() %>%
-  filter(clusters > 5) %>%
-  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-  geom_col() +
-  scale_y_continuous(trans='log10') +
-  facet_wrap(~id, scales = "fixed") +
-  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-       subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
-       x = "Cluster no.",
-       y = "No. of od pairs in cluster",
-       fill= "No. of commuters") +
-  theme_bw()
+if(sensitivity == TRUE){
+  dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
+                                               options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9),
+                                               options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000),
+                                               weights = w_vec,
+                                               flows = st_drop_geometry(od_demand_jittered),
+                                               flow_column = "total_flow"
+  )
+
+
+  arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet"))
+  # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
+
+
+  # All results plotted together
+  dbscan_sensitivity_res %>%
+    filter(cluster != 0) %>%
+    ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+    geom_col() +
+    scale_y_continuous(trans='log10') +
+    facet_wrap(~id, scales = "fixed") +
+    labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+         subtitle = "Parameter combinations that returned more than 1 cluster",
+         x = "Cluster no.",
+         y = "No. of od pairs in cluster",
+         fill= "No. of commuters") +
+    theme_bw()
+
+  ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10)
+
+  dbscan_sensitivity_res %>%
+    filter(cluster != 0) %>%
+    group_by(id) %>%
+    #mutate(clusters = n()) %>%
+    # How many clusters have more than 5 od pairs in them?
+    mutate(clusters = sum(size > 5)) %>%
+    ungroup() %>%
+    filter(clusters > 5) %>%
+    ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+    geom_col() +
+    scale_y_continuous(trans='log10') +
+    facet_wrap(~id, scales = "fixed") +
+    labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+         subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
+         x = "Cluster no.",
+         y = "No. of od pairs in cluster",
+         fill= "No. of commuters") +
+    theme_bw()
+
+  ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10)
 
-ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10)
+}
 
 
 
@@ -376,7 +381,7 @@ ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analy
 # cluster option 1: border points assigned to cluster
 cluster_dbscan = dbscan::dbscan(dist_mat,
                                 minPts = 50, # 125
-                                eps = 8, # 9.5
+                                eps = 7, # 9.5
                                 #borderPoints = FALSE,
                                 weights = w_vec)
 # # for splitted distance
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index c7029bc..d984afe 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -203,7 +203,7 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min,
 # apply function
 sub_zones_2 <- split_points(sub_zones,
                             col_to_split = "population",
-                            splits = 5,
+                            splits = 3,
                             offset_dist_min = 100,
                             offset_dist_max = 500,
                             target_crs = 3857)
@@ -238,7 +238,7 @@ od_demand_jittered = odjitter::jitter(
   # column with the flows (to be disaggregated)
   disaggregation_key = "total_flow",
   # What's the maximum number of trips per output OD row that's allowed?
-  disaggregation_threshold = 95,
+  disaggregation_threshold = 30,
 
   # ----- arguments for ZONES ----- #
 

From 9d49a18c995e5138932f182b87dd7b064e2e335a Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Wed, 6 Nov 2024 10:59:03 +0000
Subject: [PATCH 11/16] testing odjitter

---
 code/demand_cluster_flows_prep.R | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index d984afe..f9bffc6 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -203,7 +203,7 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min,
 # apply function
 sub_zones_2 <- split_points(sub_zones,
                             col_to_split = "population",
-                            splits = 3,
+                            splits = 5,
                             offset_dist_min = 100,
                             offset_dist_max = 500,
                             target_crs = 3857)
@@ -219,7 +219,7 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
 # confirm it's empty
 dir(tempdir())
 
-selected_combination = "pt_wkday_evening"
+# selected_combination = "pt_wkday_evening"
 
 od_demand_for_jittering <- od_demand_filtered %>%
   #filter(combination == selected_combination) %>%
@@ -272,8 +272,26 @@ od_demand_jittered = odjitter::jitter(
 od_demand_jittered <- od_demand_jittered %>%
   mutate(across(starts_with("total_flow"), round))
 
-
-
+# # ---------- check if new ODs are in the same zone paairs as the old ODs ----------
+
+od_demand_jittered_test <- od_demand_jittered %>%
+  mutate(Origin_new = lwgeom::st_startpoint(geometry),
+         Destination_new = lwgeom::st_endpoint(geometry)) %>%
+  st_set_geometry("Origin_new") %>%
+  st_join(study_area %>% select(MSOA21CD) %>%
+            rename(MSOA_study_area_O = MSOA21CD)) %>%
+  st_set_geometry("Destination_new") %>%
+  st_join(study_area %>% select(MSOA21CD) %>%
+            rename(MSOA_study_area_D = MSOA21CD)) %>%
+  mutate(Origin_match = MSOA_study_area_O == Origin,
+         Destination_match = MSOA_study_area_D == Destination)
+
+
+# plot to check
+plot(st_geometry(study_area))
+plot(st_geometry(od_demand_jittered %>%
+                   filter(Origin == "E02002331", Destination == "E02002335")),
+     add = TRUE, col = "red")
 
 ########## ----------------------- Decide on the SCENARIOS we want to analyse ----------------------- ##########
 

From a7d5d2a9e3a4f3fb3e72a22912130646cb8e2dce Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Thu, 7 Nov 2024 15:20:55 +0000
Subject: [PATCH 12/16] remove filter_matrix_by_distance: it was removing
 jittered geometries

---
 code/demand_cluster_flows.R | 53 +++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index 762b013..b17eeb6 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -23,7 +23,7 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "
 plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
 
 # what combination are we clustering
-day_time = "pt_wkday_evening"
+day_time = "pt_wkday_afternoon"
 # sensitivity analysis?
 sensitivity = FALSE
 
@@ -48,9 +48,17 @@ study_area <- study_area %>%
   relocate(all_of(geoid_col), .before = everything())
 
 # add distance_m column
-od_demand_jittered = filter_matrix_by_distance(zones = study_area,
-                                               od_matrix = od_demand_jittered %>% st_drop_geometry(),
-                                               dist_threshold = 500)
+
+crs_orig = st_crs(od_demand_jittered)
+crs_metric = 3857
+
+od_demand_jittered = od_demand_jittered %>%
+  st_transform(crs_metric) %>%
+  mutate(distance_m = units::drop_units(sf::st_length(.))) %>%
+  st_transform(crs_orig) %>%
+  filter(distance_m >= 500)
+
+
 
 od_demand_jittered = od_demand_jittered %>%
   filter(combination == day_time)
@@ -322,8 +330,8 @@ hist(distances$fds, breaks = 100)
 # function to get clustering results for many combinations
 if(sensitivity == TRUE){
   dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
-                                               options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9),
-                                               options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000),
+                                               options_epsilon <- c(1, 1.5, 2, 3, 4, 5, 6, 7, 8, 9),
+                                               options_minpts <- c(50, 75, 100, 150, 200, 300, 500, 1000, 2500),
                                                weights = w_vec,
                                                flows = st_drop_geometry(od_demand_jittered),
                                                flow_column = "total_flow"
@@ -380,8 +388,8 @@ if(sensitivity == TRUE){
 
 # cluster option 1: border points assigned to cluster
 cluster_dbscan = dbscan::dbscan(dist_mat,
-                                minPts = 50, # 125
-                                eps = 7, # 9.5
+                                minPts = 50, # evening: 50 # 125
+                                eps = 7, # evening: 7   # 9.5
                                 #borderPoints = FALSE,
                                 weights = w_vec)
 # # for splitted distance
@@ -430,4 +438,33 @@ st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario
 
 
 
+# --- Compare clusters at different points in the day
+dbscan_sensitivity_res_wkday_morning = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+  mutate(scenario = "pt_wkday_morning")
+dbscan_sensitivity_res_wkday_afternoon = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_afternoon.parquet")) %>%
+  mutate(scenario = "pt_wkday_afternoon")
+dbscan_sensitivity_res_wkday_evening = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_evening.parquet")) %>%
+  mutate(scenario = "pt_wkday_evening")
+
+dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning,
+                                           dbscan_sensitivity_res_wkday_evening)
 
+
+dbscan_sensitivity_res_compare %>%
+  filter(cluster != 0) %>%
+  group_by(id) %>%
+  #mutate(clusters = n()) %>%
+  # How many clusters have more than 5 od pairs in them?
+  mutate(clusters = sum(size > 25)) %>%
+  ungroup() %>%
+  filter(clusters > 25) %>%
+  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+  geom_col() +
+  scale_y_continuous(trans='log10') +
+  facet_grid(id ~ scenario, scales = "fixed") +
+  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+       subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
+       x = "Cluster no.",
+       y = "No. of od pairs in cluster",
+       fill= "No. of commuters") +
+  theme_bw()

From 9d6a7088834ec8ac86c6902e47432daf7b1108b7 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Thu, 7 Nov 2024 17:14:32 +0000
Subject: [PATCH 13/16] commit before editing for temporal granularity

---
 code/demand_cluster_flows.R               |   4 +-
 code/demand_cluster_flows_maps_temporal.R |  59 +++++----
 code/demand_cluster_flows_prep.R          |  18 ++-
 code/routing_r5r_temporal.R               | 145 ++++++++++++++++++++++
 4 files changed, 196 insertions(+), 30 deletions(-)
 create mode 100644 code/routing_r5r_temporal.R

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index b17eeb6..0b6262e 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -23,7 +23,7 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "
 plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
 
 # what combination are we clustering
-day_time = "pt_wkday_afternoon"
+day_time = "pt_wkday_evening"
 # sensitivity analysis?
 sensitivity = FALSE
 
@@ -77,7 +77,7 @@ od_demand_jittered = od_demand_jittered %>%
 # # 3) Flows with poor PT supply and low potential demand + and equal weight to origins and destinations (for flow distance)
 scenario <- 3
 clustering <- "equal"
-distance_threshold <- round(max(od_demand_jittered$distance_m), -3)
+distance_threshold <- 50000
 #
 # # 2) Focusing on shorter distances
 # scenario <- 3
diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R
index 467db3c..153f629 100644
--- a/code/demand_cluster_flows_maps_temporal.R
+++ b/code/demand_cluster_flows_maps_temporal.R
@@ -26,7 +26,7 @@ save = TRUE
 #  ------------------------ Load in the data  -------------------------- #
 
 # ----- Clustering results
-day_time = "pt_wkday_evening"
+day_time = "pt_wkday_afternoon"
 cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"))
 
 
@@ -67,7 +67,8 @@ cluster_dbscan_res %>%
   st_drop_geometry() %>%
   group_by(cluster) %>%
   summarise(size = n(), commuters_sum = sum(commute_all)) %>%
-  arrange(desc(size))
+  arrange(desc(size)) %>%
+  head(15)
 
 # add size and total commuters columns
 cluster_dbscan_res <- cluster_dbscan_res %>%
@@ -83,7 +84,7 @@ plot(cluster_dbscan_res["cluster"])
 
 # get clusters to map
 cluster_dbscan_res %>%
-  filter(size > 20, size < 5000) %>%
+  filter(size > 15, size < 5000) %>%
   filter(commuters_sum > 200) %>%
   filter(cluster != 0) -> clusters_vis
 
@@ -96,6 +97,20 @@ clusters_vis <- clusters_vis %>%
   mutate(cluster = cur_group_id()) %>%
   ungroup()
 
+
+# Keep only the 12 largest clusters by commuters_sum if there are more than 12
+if (n_distinct(clusters_vis$cluster) > 12) {
+  top_clusters <- clusters_vis %>%
+    group_by(cluster) %>%
+    summarise(total_commuters = first(commuters_sum)) %>%
+    arrange(desc(total_commuters)) %>%
+    slice_head(n = 12) %>%
+    pull(cluster)
+
+  clusters_vis <- clusters_vis %>%
+    filter(cluster %in% top_clusters)
+}
+
 # we want maximum 4 maps per row
 rows <- round(length(unique(clusters_vis$cluster)) / 3)
 
@@ -142,7 +157,7 @@ tm_shape(study_area) +
 map_cluster_results
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_", day_time, "_.png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", "_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -203,7 +218,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 # --- Map with clusters ovelayed onto bus routes
@@ -268,7 +283,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -279,8 +294,8 @@ if(save == TRUE){
 # turn clusters into polygons using convex hull
 
 clusters_vis_mode_poly <- clusters_vis_mode %>%
-  filter(size > 7, size < 5000) %>%
-  filter(commuters_sum > 200) %>%
+  # filter(size > 7, size < 5000) %>%
+  # filter(commuters_sum > 200) %>%
   filter(cluster != 0) %>%
   mutate(cluster = as.factor(cluster)) %>%
   #head(1000) %>%
@@ -346,7 +361,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -429,7 +444,7 @@ tm_shape(clusters_vis_mode_poly) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_lines
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -563,7 +578,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -640,7 +655,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -770,7 +785,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -850,7 +865,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -973,7 +988,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -1112,7 +1127,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -1250,7 +1265,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -1328,7 +1343,7 @@ tm_shape(study_area) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -1440,7 +1455,7 @@ tm_shape(basemap_urban_rural) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -1490,7 +1505,7 @@ tm_shape(basemap_urban_rural) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
@@ -1547,7 +1562,7 @@ tm_shape(basemap_urban_rural) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 # ----- ONE BIG MAP (pop density as background)
@@ -1599,7 +1614,7 @@ tm_shape(st_union(study_area)) +
 map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline
 
 if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0)
+  tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline.png"), width = 12, dpi = 1080, asp = 0)
 }
 
 
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index f9bffc6..75491c3 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -103,7 +103,10 @@ sub_zones <- sub_zones %>%
 
 # rename population density column
 sub_zones <- sub_zones %>%
-  rename(population = gbr_pd_2020_1km_UNadj)
+  rename(population = gbr_pd_2020_1km_UNadj) %>%
+  # odjitter samples with replacement. Higher weighted subpoints will keep getting sampled
+  # Get cube root to increase probability of other points being sampled
+  mutate(population = population^(1/3))
 
 
 # --- Create more points (if we don't have enough points, then odjitter may not find a feasible solution)
@@ -203,9 +206,9 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min,
 # apply function
 sub_zones_2 <- split_points(sub_zones,
                             col_to_split = "population",
-                            splits = 5,
-                            offset_dist_min = 100,
-                            offset_dist_max = 500,
+                            splits = 7,
+                            offset_dist_min = 200,
+                            offset_dist_max = 700,
                             target_crs = 3857)
 
 
@@ -238,7 +241,7 @@ od_demand_jittered = odjitter::jitter(
   # column with the flows (to be disaggregated)
   disaggregation_key = "total_flow",
   # What's the maximum number of trips per output OD row that's allowed?
-  disaggregation_threshold = 30,
+  disaggregation_threshold = 20,
 
   # ----- arguments for ZONES ----- #
 
@@ -261,7 +264,7 @@ od_demand_jittered = odjitter::jitter(
   # ----- arguments OTHER ----- #
 
   # Guarantee that jittered origin and destination points are at least this distance apart
-  min_distance_meters = 500,
+  min_distance_meters = 100,
   deduplicate_pairs = TRUE
 )
 
@@ -343,6 +346,9 @@ od_demand_jittered_scenarios <- od_demand_jittered %>%
                                 TRUE ~ 0)
   )
 
+# remove rows that have no flows
+od_demand_jittered_scenarios = od_demand_jittered_scenarios %>%
+  filter(total_flow != 0)
 
 ### Save the sfs for each scenario
 
diff --git a/code/routing_r5r_temporal.R b/code/routing_r5r_temporal.R
new file mode 100644
index 0000000..5217aa3
--- /dev/null
+++ b/code/routing_r5r_temporal.R
@@ -0,0 +1,145 @@
+###################################################################################################
+###    The purpose of this script is to calculate a travel time matrix for each different mode  ###
+###    combination. r5r is used for the calculations                                            ###
+###################################################################################################
+
+source("R/study_area_geographies.R")
+source("R/r5r_routing_wrappers.R")
+# source("code/routing_prep.R")
+
+library(tidyverse)
+library(sf)
+library(r5r)
+
+# increase the memory available to Java. Needs to be done at the beginning of the script
+options(java.parameters = "-Xmx35G")  # 3 gegabytes
+
+# define path where graph will be built (path with osm and pbf data)
+graph_path <- paste0("data/interim/routing_graph/")
+# define path where routing results will be saved
+travel_time_path <- paste0("data/processed/travel_times/")
+
+geography <- "MSOA"
+
+# what number do we want to give to OD pairs that cannot be reached within our travel time threshold
+na_time_replace <- 150  # 2.5 hours
+
+# create a directory to store the results
+dir.create(paste0(travel_time_path, geography))
+
+# ------------------------------------- PREPARE (BASE) OD MATRIX LAYER ------------------------------------- #
+
+study_area <- st_read("data/interim/study_area_boundary.geojson")
+
+# edit the study area to match the chosen resolution
+study_area <- study_area_geographies(study_area = study_area,
+                                     geography = geography)
+
+
+# r5r requires a POINT sf object with WGS84 CRS
+study_area <- study_area %>%
+  st_transform(4326) %>%
+  st_centroid()
+
+
+
+# Function takes a base layer, gets the geometry centroid, and renames the id column that we pass to it into a standard name
+prep_base_layer = function(layer, id_col){
+  # rename existing ID column with "id"
+  id_col = sym(id_col)
+  layer = layer %>%
+    rename(id = !! id_col)
+  # transform crs
+  layer = layer %>% st_transform(4326)
+  # get centroid
+  layer = layer %>% #select(id) %>%
+    st_centroid()
+}
+
+# apply the function
+study_area_r5 <- prep_base_layer(layer = study_area, id_col = "OBJECTID")
+
+
+
+# ------------------------------------- DEFINE ROUTING PARAMETERS ------------------------------------- #
+
+scenarios <- tribble(
+  ~scenario, ~mode,  ~departure_datetime,
+  # public transport at different times of day / week
+  # weekday
+  "pt_wkday_06_30", c("WALK", "TRANSIT"),  "14-08-2023 06:30:00",
+  "pt_wkday_09_30", c("WALK", "TRANSIT"),  "14-08-2023 09:30:00",
+  "pt_wkday_12_30", c("WALK", "TRANSIT"), "14-08-2023 12:30:00",
+  "pt_wkday_15_30", c("WALK", "TRANSIT"), "14-08-2023 15:30:00",
+  "pt_wkday_18_30", c("WALK", "TRANSIT"),  "14-08-2023 18:30:00",
+  # weekend
+  "pt_wkend_06_30", c("WALK", "TRANSIT"),  "13-08-2023 06:30:00",
+  "pt_wkend_09_30", c("WALK", "TRANSIT"),  "13-08-2023 09:30:00",
+  "pt_wkend_12_30", c("WALK", "TRANSIT"), "13-08-2023 12:30:00",
+  "pt_wkend_15_30", c("WALK", "TRANSIT"), "13-08-2023 15:30:00",
+  "pt_wkend_18_30", c("WALK", "TRANSIT"),  "13-08-2023 18:30:00",
+  # car (travel time is the same regardless of day / time) - unless we add congestion
+  #"car", c("CAR"), "14-08-2023 07:30:00",
+)
+
+
+# ------------------------------------- BUILD ROUTING GRAPH ------------------------------------- #
+
+# stop any running r5 instances
+# r5r::stop_r5()
+# java garbage collector to free up memory
+#rJava::.jgc(R.gc = TRUE)
+
+# setup r5
+print("Setting up r5...")
+r5r_core <- setup_r5(data_path = graph_path,
+                     verbose = TRUE,
+                     overwrite = TRUE) # turn to true once we have elevation
+
+print("Graph built...")
+# ------------------------------------- CALCULATE TRAVEL TIME MATRIX ------------------------------------- #
+
+
+# ---------- 2. route using r5r::expanded_travel_time_matrix. This gives you travel time broken down by journey components
+
+# --- apply the routing function
+
+# Option 1: story in memory
+
+# tt_expanded_results <- tt_matrix_expanded(#scenarios = scenarios,
+#                                           scenarios = scenarios[scenarios$scenario != "car", ],
+#                                           zone_layer = study_area_r5[1:10,],
+#                                           time_window = 5,
+#                                           storage_option = "memory")
+
+# Option 2: save to disk
+
+tt_matrix_expanded(scenarios = scenarios[scenarios$scenario != "car", ],
+                   zone_layer = study_area_r5,
+                   time_window = 10,
+                   storage_option = "save",
+                   save_format = "parquet",
+                   folder_path = paste0(travel_time_path, geography, "/travel_time_matrix_expanded/temporal"))
+
+
+# --- read in the parquet files
+# read in all the files using purrr::map
+files <- dir(paste0(travel_time_path, geography, "/travel_time_matrix_expanded/temporal"), full.names = TRUE)
+tt_results_expanded <- map(files, arrow::read_parquet)
+tt_results_expanded <- bind_rows(tt_results_expanded)
+
+# summarise the results to get one row per group. Currently we have one row for each minute in a time_window
+tt_results_expanded_s <- summarise_ttm_expanded(ttm_expanded_results = tt_results_expanded)
+
+# save the results
+arrow::write_parquet(tt_results_expanded_s, paste0(travel_time_path, geography, "/travel_time_matrix_expanded_temporal.parquet"))
+#tt_results_expanded_s <- arrow::read_parquet(paste0(travel_time_path, geography, "/travel_time_matrix_expanded.parquet"))
+
+
+
+
+# stop r5
+r5r::stop_r5(r5r_core)
+# java garbage collector to free up memory
+rJava::.jgc(R.gc = TRUE)
+

From 2096d6a05e7989e92e29e25050987a0eb3fce2f5 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Fri, 8 Nov 2024 18:13:48 +0000
Subject: [PATCH 14/16] temporal maps done

---
 code/demand_cluster_flows.R               |   71 +-
 code/demand_cluster_flows_maps_temporal.R | 1030 ++++++++++-----------
 code/demand_cluster_flows_run.R           |  251 +++++
 code/demand_on_buses.R                    |   12 +-
 code/eda_travel_demand_nobus.R            |   28 +-
 code/gtfs_convert_stop_times_to_freq.R    |   12 +-
 code/performance_od.R                     |    6 +-
 code/travel_demand_temporal.R             |   10 +-
 8 files changed, 805 insertions(+), 615 deletions(-)
 create mode 100644 code/demand_cluster_flows_run.R

diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
index 0b6262e..5c4b16d 100644
--- a/code/demand_cluster_flows.R
+++ b/code/demand_cluster_flows.R
@@ -23,9 +23,9 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "
 plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
 
 # what combination are we clustering
-day_time = "pt_wkday_evening"
+# day_time = "pt_wkday_06_30"
 # sensitivity analysis?
-sensitivity = FALSE
+# sensitivity = FALSE
 
 # ----------- 1. Study area
 
@@ -435,36 +435,43 @@ cluster_dbscan_res <- od_demand_jittered %>%
 st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"), delete_dsn = TRUE)
 
 
+#
+#
+#
+# # --- Compare clusters at different points in the day
+# dbscan_sensitivity_res_wkday_06_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+#   mutate(scenario = "pt_wkday_06_30")
+# dbscan_sensitivity_res_wkday_09_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+#   mutate(scenario = "pt_wkday_09_30")
+# dbscan_sensitivity_res_wkday_12_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+#   mutate(scenario = "pt_wkday_12_30")
+# dbscan_sensitivity_res_wkday_15_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+#   mutate(scenario = "pt_wkday_15_30")
+# dbscan_sensitivity_res_wkday_18_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+#   mutate(scenario = "pt_wkday_18_30")
+#
+# dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning,
+#                                            dbscan_sensitivity_res_wkday_evening)
+#
+#
+# dbscan_sensitivity_res_compare %>%
+#   filter(cluster != 0) %>%
+#   group_by(id) %>%
+#   #mutate(clusters = n()) %>%
+#   # How many clusters have more than 5 od pairs in them?
+#   mutate(clusters = sum(size > 25)) %>%
+#   ungroup() %>%
+#   filter(clusters > 25) %>%
+#   ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+#   geom_col() +
+#   scale_y_continuous(trans='log10') +
+#   facet_grid(id ~ scenario, scales = "fixed") +
+#   labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+#        subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
+#        x = "Cluster no.",
+#        y = "No. of od pairs in cluster",
+#        fill= "No. of commuters") +
+#   theme_bw()
 
 
 
-# --- Compare clusters at different points in the day
-dbscan_sensitivity_res_wkday_morning = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
-  mutate(scenario = "pt_wkday_morning")
-dbscan_sensitivity_res_wkday_afternoon = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_afternoon.parquet")) %>%
-  mutate(scenario = "pt_wkday_afternoon")
-dbscan_sensitivity_res_wkday_evening = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_evening.parquet")) %>%
-  mutate(scenario = "pt_wkday_evening")
-
-dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning,
-                                           dbscan_sensitivity_res_wkday_evening)
-
-
-dbscan_sensitivity_res_compare %>%
-  filter(cluster != 0) %>%
-  group_by(id) %>%
-  #mutate(clusters = n()) %>%
-  # How many clusters have more than 5 od pairs in them?
-  mutate(clusters = sum(size > 25)) %>%
-  ungroup() %>%
-  filter(clusters > 25) %>%
-  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-  geom_col() +
-  scale_y_continuous(trans='log10') +
-  facet_grid(id ~ scenario, scales = "fixed") +
-  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-       subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
-       x = "Cluster no.",
-       y = "No. of od pairs in cluster",
-       fill= "No. of commuters") +
-  theme_bw()
diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R
index 153f629..3ca40fa 100644
--- a/code/demand_cluster_flows_maps_temporal.R
+++ b/code/demand_cluster_flows_maps_temporal.R
@@ -21,12 +21,12 @@ clustering <- "equal"
 distance_threshold <- 50000   # 10000
 
 # save plots?
-save = TRUE
+save = FALSE
 
 #  ------------------------ Load in the data  -------------------------- #
 
 # ----- Clustering results
-day_time = "pt_wkday_afternoon"
+# day_time = "pt_wkday_06_30"
 cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"))
 
 
@@ -116,49 +116,49 @@ rows <- round(length(unique(clusters_vis$cluster)) / 3)
 
 
 ##### ---------- MAPS ---------- #####
-
-### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ###
-
-tm_shape(study_area) +
-  tm_borders(col = "grey60",
-             alpha = 0.5) +
-  tm_shape(study_area) +
-  tm_fill(col = "grey95",
-          alpha = 0.5) +
-  tm_shape(clusters_vis) +
-  tm_lines(lwd = "commute_all",
-           col = "cluster",
-           #col = "darkgreen",
-           scale = 10,
-           palette = "Accent", #YlGn
-           #style = "pretty",
-           alpha = 1,
-           title.col = "Cluster",
-           title.lwd = "No. of commuters",
-           legend.col.show = FALSE,
-           # remove "missing from legend
-           showNA = FALSE) +
-  tm_facets(by = "cluster",
-            free.coords = FALSE,
-            nrow = rows,
-            showNA = FALSE) +
-  tm_layout(fontfamily = 'Georgia',
-            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
-            main.title.size = 1.1,
-            main.title.color = "azure4",
-            main.title.position = "left",
-            legend.outside = TRUE,
-            legend.outside.position = "bottom",
-            legend.stack = "horizontal",
-            # remove panel headers
-            panel.show = FALSE,
-            frame = FALSE) -> map_cluster_results
-
-map_cluster_results
-
-if(save == TRUE){
-  tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", "_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0)
-}
+#
+# ### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ###
+#
+# tm_shape(study_area) +
+#   tm_borders(col = "grey60",
+#              alpha = 0.5) +
+#   tm_shape(study_area) +
+#   tm_fill(col = "grey95",
+#           alpha = 0.5) +
+#   tm_shape(clusters_vis) +
+#   tm_lines(lwd = "commute_all",
+#            col = "cluster",
+#            #col = "darkgreen",
+#            scale = 10,
+#            palette = "Accent", #YlGn
+#            #style = "pretty",
+#            alpha = 1,
+#            title.col = "Cluster",
+#            title.lwd = "No. of commuters",
+#            legend.col.show = FALSE,
+#            # remove "missing from legend
+#            showNA = FALSE) +
+#   tm_facets(by = "cluster",
+#             free.coords = FALSE,
+#             nrow = rows,
+#             showNA = FALSE) +
+#   tm_layout(fontfamily = 'Georgia',
+#             main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+#             main.title.size = 1.1,
+#             main.title.color = "azure4",
+#             main.title.position = "left",
+#             legend.outside = TRUE,
+#             legend.outside.position = "bottom",
+#             legend.stack = "horizontal",
+#             # remove panel headers
+#             panel.show = FALSE,
+#             frame = FALSE) -> map_cluster_results
+#
+# map_cluster_results
+#
+# if(save == TRUE){
+#   tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0)
+# }
 
 
 ### ---------- Plot 2: Compare mode composition of each cluster (desire line level) ---------- ###
@@ -180,52 +180,52 @@ clusters_vis_mode_summary <- clusters_vis_mode %>%
 clusters_vis_mode <- clusters_vis_mode %>%
   inner_join(clusters_vis_mode_summary %>% select(cluster), by = "cluster")
 
-
-tm_shape(study_area) +
-  tm_borders(col = "grey60",
-             alpha = 0.5) +
-  tm_shape(study_area) +
-  tm_fill(col = "grey95",
-          alpha = 0.5) +
-  tm_shape(clusters_vis_mode) +
-  tm_lines(lwd = "commute_all",
-           col = "commute_all",
-           scale = 10,
-           palette = "RdYlGn", #Accent
-           #style = "pretty",
-           alpha = 1,
-           title.col = "Travel demand",
-           title.lwd = "No. of commuters",
-           #legend.col.is.portrait = FALSE,
-           # remove "missing from legend
-           showNA = FALSE) +
-  tm_facets(by = "cluster",
-            free.coords = FALSE,
-            nrow = rows,
-            showNA = FALSE) +
-  tm_layout(fontfamily = 'Georgia',
-            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
-            main.title.size = 1.1,
-            main.title.color = "azure4",
-            main.title.position = "left",
-            # legend.outside = TRUE,
-            # legend.outside.position = "bottom",
-            # legend.stack = "horizontal",
-            # remove panel headers
-            panel.show = FALSE,
-            frame = FALSE) -> map_cluster_results_bus_frac_grouped
-
-map_cluster_results_bus_frac_grouped
-
-if(save == TRUE){
-  tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0)
-}
+#
+# tm_shape(study_area) +
+#   tm_borders(col = "grey60",
+#              alpha = 0.5) +
+#   tm_shape(study_area) +
+#   tm_fill(col = "grey95",
+#           alpha = 0.5) +
+#   tm_shape(clusters_vis_mode) +
+#   tm_lines(lwd = "commute_all",
+#            col = "commute_all",
+#            scale = 10,
+#            palette = "RdYlGn", #Accent
+#            #style = "pretty",
+#            alpha = 1,
+#            title.col = "Travel demand",
+#            title.lwd = "No. of commuters",
+#            #legend.col.is.portrait = FALSE,
+#            # remove "missing from legend
+#            showNA = FALSE) +
+#   tm_facets(by = "cluster",
+#             free.coords = FALSE,
+#             nrow = rows,
+#             showNA = FALSE) +
+#   tm_layout(fontfamily = 'Georgia',
+#             main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
+#             main.title.size = 1.1,
+#             main.title.color = "azure4",
+#             main.title.position = "left",
+#             # legend.outside = TRUE,
+#             # legend.outside.position = "bottom",
+#             # legend.stack = "horizontal",
+#             # remove panel headers
+#             panel.show = FALSE,
+#             frame = FALSE) -> map_cluster_results_bus_frac_grouped
+#
+# map_cluster_results_bus_frac_grouped
+#
+# if(save == TRUE){
+#   tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0)
+# }
 
 # --- Map with clusters ovelayed onto bus routes
 
 
 
-gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf.geojson")
+gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson")
 
 # gtfs_bus <- gtfs_bus %>%
 #   st_filter(st_union(study_area), .predicate = st_within)
@@ -1058,7 +1058,7 @@ tm_shape(study_area) +
           alpha = 0.6,
           jitter = 0.01,
           size = "commute_all",
-          scale = 0.8,
+          scale = 0.5,
           legend.size.show = FALSE) +
   tm_facets(by = "cluster",
             #by = "commute_all",
@@ -1077,7 +1077,7 @@ tm_shape(study_area) +
           alpha = 0.6,
           jitter = 0.01,
           size = "commute_all",
-          scale = 0.8,
+          scale = 0.5,
           legend.size.show = FALSE) +
   tm_facets(by = "cluster",
             #by = "commute_all",
@@ -1196,7 +1196,7 @@ tm_shape(study_area) +
           alpha = 0.6,
           jitter = 0.01,
           size = "commute_all",
-          scale = 1,
+          scale = 0.5,
           legend.size.show = FALSE) +
   tm_facets(by = "cluster",
             #by = "commute_all",
@@ -1215,7 +1215,7 @@ tm_shape(study_area) +
           alpha = 0.6,
           jitter = 0.01,
           size = "commute_all",
-          scale = 1,
+          scale = 0.5,
           legend.size.show = FALSE) +
   tm_facets(by = "cluster",
             #by = "commute_all",
@@ -1463,6 +1463,20 @@ if(save == TRUE){
 
 # ---------------------- ONE BIG MAP
 
+# Combine cluster polygons
+clusters_vis_mode_poly_filt3_all = clusters_vis_mode_poly_filt3 %>%
+  mutate(area = st_area(.)) %>%
+  filter(area > 0.2 * mean(area)) %>%
+  st_union() %>%
+  st_make_valid() %>%
+  st_as_sf() %>%
+  mutate(scenario = day_time)
+
+# Save to make temporal comparison plot
+st_write(clusters_vis_mode_poly_filt3_all, paste0(plots_path, "polygons_combined/", day_time, ".geojson"), delete_dsn = TRUE)
+
+
+
 
 tm_shape(basemap_urban_rural) +
   tm_fill(col = "RUC11CD_NM",
@@ -1470,7 +1484,7 @@ tm_shape(basemap_urban_rural) +
           alpha = 0.5,
           title = "Level of \nUrbanisation") +
   # bus layer
-  tm_shape(gtfs_bus %>%
+tm_shape(gtfs_bus %>%
              filter(scenario == day_time) %>%
              mutate(headway_inv = (1/headway_secs) * 3600) %>%
              filter(headway_secs < 7200)) +
@@ -1487,9 +1501,7 @@ tm_shape(basemap_urban_rural) +
   # ---- clusters
   # poly fill
   #tm_shape(st_union(clusters_vis_mode_poly_filt3)) +
-  tm_shape(st_union(clusters_vis_mode_poly_filt3 %>%
-                      mutate(area = st_area(.)) %>%
-                      filter(area > 0.2 * mean(area)))) +
+tm_shape(clusters_vis_mode_poly_filt3_all) +
   tm_borders(col = "darkgreen",
              lwd = 3.5,
              lty = "dashed") +
@@ -1544,9 +1556,7 @@ tm_shape(basemap_urban_rural) +
   # ---- clusters
   # poly fill
   #tm_shape(st_union(clusters_vis_mode_poly_filt3)) +
-  tm_shape(st_union(clusters_vis_mode_poly_filt3 %>%
-                      mutate(area = st_area(.)) %>%
-                      filter(area > 0.2 * mean(area)))) +
+  tm_shape(clusters_vis_mode_poly_filt3_all) +
   tm_borders(col = "darkgreen",
              lwd = 3.5,
              lty = "dashed") +
@@ -1596,9 +1606,7 @@ tm_shape(st_union(study_area)) +
   # ---- clusters
   # poly fill
   #tm_shape(st_union(clusters_vis_mode_poly_filt3)) +
-  tm_shape(st_union(clusters_vis_mode_poly_filt3 %>%
-                      mutate(area = st_area(.)) %>%
-                      filter(area > 0.2 * mean(area)))) +
+  tm_shape(clusters_vis_mode_poly_filt3_all) +
   tm_borders(col = "darkgreen",
              lwd = 4,
              lty = "dashed") +
@@ -1619,457 +1627,373 @@ if(save == TRUE){
 
 
 
-
-
-
-
-
-
-
-
-
-
-
-##### ---------- FIGURES ---------- #####
-
-### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering
-
-# is the data disaggregated by mode?
-mode <- TRUE
-#mode <- FALSE
-
-# Demand (census) + supply (travel time) data
-
-if(mode == FALSE){
-  # data with "commute_all" only
-  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
-} else{
-  # data with modes
-  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
-}
-
-# filter to specific combination
-# TODO: get seperate flows for car and pt, and keep two combinations
-od_demand <- od_demand %>%
-  filter(combination == "pt_wkday_morning")
-
-od_demand <- od_demand %>%
-  select(-distance_m)
-
-# --- create desire lines and remove od pairs with very short distance
-
-# TODO: edit this to avoid clusters of very short flows
-# "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid
-# clusters of very short flows. this involves splitting the data into 3 chunks
-# based on length (
-od_demand_filtered = filter_matrix_by_distance(zones = study_area,
-                                               od_matrix = od_demand,
-                                               dist_threshold = 1000)
-
-# add unique id for each row
-od_demand_filtered <- od_demand_filtered %>%
-  mutate(od_id = paste0(Origin, "-", Destination, "-", combination))
-
-
-
-
-# ----- add the clustering results to the unjittered demand
-od_demand_figures <- cluster_dbscan_res %>%
-  select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>%
-  left_join(od_demand_filtered %>%
-              st_drop_geometry() %>%
-              distinct(od_id, .keep_all = TRUE) %>%
-              # travel times before being ruined by od_jitter()
-              select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")),
-            by = "od_id")
-
-# match existing clusters
+# ##### ---------- FIGURES ---------- #####
+#
+# ### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering
+#
+# # is the data disaggregated by mode?
+# mode <- TRUE
+# #mode <- FALSE
+#
+# # Demand (census) + supply (travel time) data
+#
+# if(mode == FALSE){
+#   # data with "commute_all" only
+#   #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+#   od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
+# } else{
+#   # data with modes
+#   #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
+#   od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
+# }
+#
+# # filter to specific combination
+# # TODO: get seperate flows for car and pt, and keep two combinations
+# od_demand <- od_demand %>%
+#   filter(combination == "pt_wkday_morning")
+#
+# od_demand <- od_demand %>%
+#   select(-distance_m)
+#
+# # --- create desire lines and remove od pairs with very short distance
+#
+# # TODO: edit this to avoid clusters of very short flows
+# # "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid
+# # clusters of very short flows. this involves splitting the data into 3 chunks
+# # based on length (
+# od_demand_filtered = filter_matrix_by_distance(zones = study_area,
+#                                                od_matrix = od_demand,
+#                                                dist_threshold = 1000)
+#
+# # add unique id for each row
+# od_demand_filtered <- od_demand_filtered %>%
+#   mutate(od_id = paste0(Origin, "-", Destination, "-", combination))
+#
+#
+#
+#
+# # ----- add the clustering results to the unjittered demand
+# od_demand_figures <- cluster_dbscan_res %>%
+#   select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>%
+#   left_join(od_demand_filtered %>%
+#               st_drop_geometry() %>%
+#               distinct(od_id, .keep_all = TRUE) %>%
+#               # travel times before being ruined by od_jitter()
+#               select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")),
+#             by = "od_id")
+#
+# # match existing clusters
+# # od_demand_figures_filt <- od_demand_figures %>%
+# #   filter(size > 7, size < 5000) %>%
+# #   filter(commute_all > 200) %>%
+# #   filter(cluster != 0)
+#
 # od_demand_figures_filt <- od_demand_figures %>%
-#   filter(size > 7, size < 5000) %>%
-#   filter(commute_all > 200) %>%
-#   filter(cluster != 0)
-
-od_demand_figures_filt <- od_demand_figures %>%
-  rename(cluster_orig = cluster) %>%
-  inner_join(clusters_vis %>%
-               st_drop_geometry() %>%
-               select(flow_ID, cluster),
-             by = "flow_ID") %>%
-  # keep only the clusters after intersection with gtfs
-  filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)
-
-
-
-
-# ----- plots
-
-# scatter plots with x: commute_all, y: fraction of bus commuters
-ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) +
-  geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
-  geom_point() +
-  ylim(0, 5) +
-  scale_color_distiller(palette= "RdYlGn", direction = 1) +
-  labs(x = "total no. of commuters",
-       y = "Bus / Car commuters (fraction)",
-       title = "Composition of clusters",
-       subtitle = "No. of commuters",
-       color = "Commuters (bus)") +
-  facet_wrap(facets = vars(cluster)) +
-  theme_bw() +
-  theme(legend.position = "bottom")
-
-ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-
-# paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0)
-
-
-ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) +
-  geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
-  geom_point() +
-  ylim(0, 5) +
-  scale_color_distiller(palette= "RdYlGn", direction = 1) +
-  labs(x = "total no. of commuters",
-       y = "Bus / Car commuters (fraction)",
-       color = "Average speed of \nbus commute (kph)",
-       title = "Composition of clusters: \nNo. of commuters in each OD pair") +
-  facet_wrap(facets = vars(cluster)) +
-  theme_bw() +
-  theme(legend.position = "bottom")
-
-ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-
-# same but replacing NA speed for min speed
-ggplot(od_demand_figures_filt %>%
-         st_drop_geometry() %>%
-         mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))),
-       aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) +
-  geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
-  geom_point() +
-  ylim(0, 5) +
-  scale_color_distiller(palette= "RdYlGn", direction = 1) +
-  labs(x = "total no. of commuters",
-       y = "Bus / Car commuters (fraction)",
-       color = "Average speed of \nbus commute (kph)",
-       title = "Composition of clusters: \nNo. of commuters in each OD pair") +
-  facet_wrap(facets = vars(cluster)) +
-  theme_bw() +
-  theme(legend.position = "bottom")
-
-ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6)
-
-# ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?)
-
-# get intersection
-clusters_vis_mode_poly %>%
-  st_intersection(basemap_urban_rural %>%
-                    st_transform(st_crs(clusters_vis_mode_poly))) %>%
-  # get area
-  mutate(area_km = as.numeric(st_area(.) / 1000000))  %>%
-  # area by cluster
-  st_drop_geometry() %>%
-  group_by(cluster, RUC11, RUC11CD_NM_FCT) %>%
-  summarise(area_km = sum(area_km)) %>%
-  # keep only the clusters after intersection with gtfs
-  filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly
-
-# define custom color paletter
-colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A"))
-# order column based on the RUC11CD_NM_FCT column (degree of urbanisation)
-clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)])
-
-ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) +
-  geom_col(color = "grey60") +
-  scale_fill_manual(values = colors_urban_rural,
-                    labels = function(x) str_wrap(x, width = 25)) +
-  labs(x = "",
-       y = "Area covered by cluster (km2)",
-       color = "Rural / Urban Classification",
-       title = "Composition of clusters: Rural / Urban") +
-  theme_bw() +
-  theme(axis.text.x = element_blank(),
-        axis.ticks.x=element_blank(),
-        legend.position = "bottom",
-        legend.title = element_blank()) +
-  guides(fill = guide_legend(nrow = 3)) +
-  facet_wrap(facets = vars(cluster))
-
-ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-
-
-# Same but for filtered polygon
-
-clusters_vis_mode_poly_filt3 %>%
-  st_intersection(basemap_urban_rural %>%
-                    st_transform(st_crs(clusters_vis_mode_poly))) %>%
-  # get area
-  mutate(area_km = as.numeric(st_area(.) / 1000000))  %>%
-  # area by cluster
-  st_drop_geometry() %>%
-  group_by(cluster, RUC11, RUC11CD_NM_FCT) %>%
-  summarise(area_km = sum(area_km))  %>%
-  # keep only the clusters after intersection with gtfs
-  filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt
-
-clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)])
-
-
-ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) +
-  geom_col(color = "grey60") +
-  scale_fill_manual(values = colors_urban_rural,
-                    #direction = -1,
-                    labels = function(x) str_wrap(x, width = 25)) +
-  labs(x = "",
-       y = "Area covered by cluster (km2)",
-       color = "Rural / Urban Classification",
-       title = "Composition of clusters: Rural / Urban") +
-  theme_bw() +
-  theme(axis.text.x = element_blank(),
-        axis.ticks.x=element_blank(),
-        legend.position = "bottom",
-        legend.title = element_blank()) +
-  guides(fill = guide_legend(nrow = 3)) +
-  facet_wrap(facets = vars(cluster))
-
-ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-
-
-
-
-### ---- join the data to plot together
-clusters_ur_poly %>%
-  left_join(clusters_ur_poly_filt %>%
-              rename(area_km_filt = area_km),
-            by = c("cluster", "RUC11")) -> clusters_ur_poly_combined
-
-
-
-# --- plot both together
-ggplot(clusters_ur_poly_combined) +
-  geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) +
-  geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) +
-  scale_fill_manual(values = colors_urban_rural,
-                    #direction = -1,
-                    labels = function(x) str_wrap(x, width = 25)) +
-  scale_color_manual(values = colors_urban_rural,
-                     #direction = -1,
-                     labels = function(x) str_wrap(x, width = 25)) +
-  labs(x = "",
-       y = "Area covered by cluster (km2)",
-       fill = "Rural / Urban Classification",
-       title = "Composition of clusters: Rural / Urban",
-       subtitle = "Before and after intersecting with bus network",
-       caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") +
-  guides(color = "none", alpha = "none") +
-  scale_alpha_identity() +  # Maintain alpha value
-  theme_bw() +
-  theme(axis.text.x = element_blank(),
-        axis.ticks.x=element_blank(),
-        legend.position = "bottom",
-        legend.title = element_blank(),
-        plot.caption = element_text(hjust = 0)) +
-  guides(fill = guide_legend(nrow = 3)) +
-  facet_wrap(facets = vars(cluster))
-
-ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-
-
-
-
-
-# ---------- PLots of line bearings in each cluster
-
-# --- calculate bearings
-
-od_demand_figures_bearings <- od_demand_figures_filt %>%
-  mutate(bearing = stplanr::line_bearing(.),
-         bearing_adjusted = case_when(bearing < 0 ~ bearing + 360,
-                                      .default =  bearing),
-         bearing_180 = case_when(bearing < 0 ~ bearing * -1,
-                                 .default =  bearing))
-
-
-
-# add length column
-od_demand_figures_bearings <- od_demand_figures_bearings %>%
-  mutate(distance_m =  units::drop_units(sf::st_length(.)))
-
-# --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots
-
-
-# Define the breaks for the buckets
-#breaks_angle <- seq(-10, 370, by = 20)
-breaks_angle <- seq(0, 360, by = 30)
-#breaks_angle <- seq(0, 180, by = 15)
-breaks_distance <- seq(0, 50, by = 10)
-
-od_demand_figures_bearings <- od_demand_figures_bearings %>%
-  mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE),
-         # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE),
-         bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>%
-  mutate(bucket_distance = fct_rev(bucket_distance))
-
-# Ensure the bucket factor levels are ordered correctly
-#bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE)
-
-
-# --- Plot with no of lines only (geom_bar())
-
-
-# bar
-ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) +
-  geom_bar() +
-  facet_wrap(facets = "cluster") +
-  labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") +
-  scale_x_discrete(labels = breaks_angle) +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        legend.position = "bottom")
-
-ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8)
-
-
-# circle
-ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) +
-  geom_bar() +
-  coord_polar(start = -0.1) +
-  facet_wrap(facets = "cluster") +
-  labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") +
-  #scale_x_discrete(labels = breaks_angle) +
-  scale_x_discrete(labels = breaks_angle) +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))
-
-
-
-# --- Plot with total commuters geom_col()
-
-
-# bar
-ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
-  geom_col() +
-  #coord_polar() +
-  facet_wrap(facets = "cluster") +
-  labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") +
-  scale_x_discrete(labels = breaks_angle) +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        legend.position = "bottom")
-
-ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8)
-
-# circle
-ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
-  geom_col() +
-  coord_radial(rotate.angle = TRUE) +
-  facet_wrap(facets = "cluster") +
-  labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") +
-  #scale_x_discrete(labels = breaks_angle) +
-  scale_x_discrete(labels = breaks_angle) +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))
-
-
-
-
-# TESTING FACET GRID
-
-# Select labels to print (one in every three)
-labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)]
-
-
-ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
-  geom_col() +
-  #coord_polar() +
-  facet_grid(bucket_distance ~ cluster,
-             drop = TRUE) +
-  labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") +
-  # scale_x_discrete(labels = breaks_angle) +
-  scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() +
-  theme_bw() +
-  theme(axis.text.x = element_text(angle = 60, hjust = 1),
-        legend.position = "bottom")
-
-
-ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10)
-
-
-
-
-
-
-
-
-
-
-
-
-
-clusters_vis_sp = clusters_vis %>%
-  mutate(geometry = st_startpoint(.))
-
-clusters_vis_ep = clusters_vis %>%
-  mutate(geometry = st_endpoint(.))
-
-tm_shape(study_area) +
-  tm_borders(col = "grey60",
-             alpha = 0.5) +
-  tm_shape(study_area) +
-  tm_fill(col = "grey95",
-          alpha = 0.5) +
-  tm_shape(clusters_vis) +
-  tm_lines(lwd = "commute_all",
-           col = "cluster",
-           #col = "darkgreen",
-           scale = 10,
-           palette = "Accent", #YlGn
-           #style = "pretty",
-           alpha = 1,
-           title.col = "Cluster",
-           title.lwd = "No. of commuters",
-           legend.col.show = FALSE,
-           # remove "missing from legend
-           showNA = FALSE) +
-  tm_facets(by = "cluster",
-            free.coords = FALSE,
-            nrow = rows,
-            showNA = FALSE) +
-  tm_shape(clusters_vis_sp) +
-  tm_dots(col = "black",
-          #col = "darkgreen",
-          shape = 0,
-          scale = 2,
-          palette = "Accent", #YlGn
-          #style = "pretty",
-          alpha = 1,
-          # remove "missing from legend
-          showNA = FALSE) +
-  tm_facets(by = "cluster",
-            free.coords = FALSE,
-            nrow = rows,
-            showNA = FALSE) +
-  tm_shape(clusters_vis_ep) +
-  tm_dots(col = "darkred",
-          #col = "darkgreen",
-          shape = 6,
-          scale = 2,
-          palette = "Accent", #YlGn
-          #style = "pretty",
-          alpha = 1,
-          # remove "missing from legend
-          showNA = FALSE) +
-  tm_facets(by = "cluster",
-            free.coords = FALSE,
-            nrow = rows,
-            showNA = FALSE) +
-
-  tm_layout(fontfamily = 'Georgia',
-            main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time),
-            main.title.size = 1.1,
-            main.title.color = "azure4",
-            main.title.position = "left",
-            legend.outside = TRUE,
-            legend.outside.position = "bottom",
-            legend.stack = "horizontal",
-            # remove panel headers
-            panel.show = FALSE,
-            frame = FALSE) -> map_cluster_results
-
-map_cluster_results
+#   rename(cluster_orig = cluster) %>%
+#   inner_join(clusters_vis %>%
+#                st_drop_geometry() %>%
+#                select(flow_ID, cluster),
+#              by = "flow_ID") %>%
+#   # keep only the clusters after intersection with gtfs
+#   filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)
+#
+
+
+#
+# # ----- plots
+#
+# # scatter plots with x: commute_all, y: fraction of bus commuters
+# ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) +
+#   geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
+#   geom_point() +
+#   ylim(0, 5) +
+#   scale_color_distiller(palette= "RdYlGn", direction = 1) +
+#   labs(x = "total no. of commuters",
+#        y = "Bus / Car commuters (fraction)",
+#        title = "Composition of clusters",
+#        subtitle = "No. of commuters",
+#        color = "Commuters (bus)") +
+#   facet_wrap(facets = vars(cluster)) +
+#   theme_bw() +
+#   theme(legend.position = "bottom")
+#
+# ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+#
+# # paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0)
+#
+#
+# ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) +
+#   geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
+#   geom_point() +
+#   ylim(0, 5) +
+#   scale_color_distiller(palette= "RdYlGn", direction = 1) +
+#   labs(x = "total no. of commuters",
+#        y = "Bus / Car commuters (fraction)",
+#        color = "Average speed of \nbus commute (kph)",
+#        title = "Composition of clusters: \nNo. of commuters in each OD pair") +
+#   facet_wrap(facets = vars(cluster)) +
+#   theme_bw() +
+#   theme(legend.position = "bottom")
+#
+# ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+#
+# # same but replacing NA speed for min speed
+# ggplot(od_demand_figures_filt %>%
+#          st_drop_geometry() %>%
+#          mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))),
+#        aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) +
+#   geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") +
+#   geom_point() +
+#   ylim(0, 5) +
+#   scale_color_distiller(palette= "RdYlGn", direction = 1) +
+#   labs(x = "total no. of commuters",
+#        y = "Bus / Car commuters (fraction)",
+#        color = "Average speed of \nbus commute (kph)",
+#        title = "Composition of clusters: \nNo. of commuters in each OD pair") +
+#   facet_wrap(facets = vars(cluster)) +
+#   theme_bw() +
+#   theme(legend.position = "bottom")
+#
+# ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6)
+#
+# # ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?)
+#
+# # get intersection
+# clusters_vis_mode_poly %>%
+#   st_intersection(basemap_urban_rural %>%
+#                     st_transform(st_crs(clusters_vis_mode_poly))) %>%
+#   # get area
+#   mutate(area_km = as.numeric(st_area(.) / 1000000))  %>%
+#   # area by cluster
+#   st_drop_geometry() %>%
+#   group_by(cluster, RUC11, RUC11CD_NM_FCT) %>%
+#   summarise(area_km = sum(area_km)) %>%
+#   # keep only the clusters after intersection with gtfs
+#   filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly
+#
+# # define custom color paletter
+# colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A"))
+# # order column based on the RUC11CD_NM_FCT column (degree of urbanisation)
+# clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)])
+#
+# ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) +
+#   geom_col(color = "grey60") +
+#   scale_fill_manual(values = colors_urban_rural,
+#                     labels = function(x) str_wrap(x, width = 25)) +
+#   labs(x = "",
+#        y = "Area covered by cluster (km2)",
+#        color = "Rural / Urban Classification",
+#        title = "Composition of clusters: Rural / Urban") +
+#   theme_bw() +
+#   theme(axis.text.x = element_blank(),
+#         axis.ticks.x=element_blank(),
+#         legend.position = "bottom",
+#         legend.title = element_blank()) +
+#   guides(fill = guide_legend(nrow = 3)) +
+#   facet_wrap(facets = vars(cluster))
+#
+# ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+#
+#
+# # Same but for filtered polygon
+#
+# clusters_vis_mode_poly_filt3 %>%
+#   st_intersection(basemap_urban_rural %>%
+#                     st_transform(st_crs(clusters_vis_mode_poly))) %>%
+#   # get area
+#   mutate(area_km = as.numeric(st_area(.) / 1000000))  %>%
+#   # area by cluster
+#   st_drop_geometry() %>%
+#   group_by(cluster, RUC11, RUC11CD_NM_FCT) %>%
+#   summarise(area_km = sum(area_km))  %>%
+#   # keep only the clusters after intersection with gtfs
+#   filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt
+#
+# clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)])
+#
+#
+# ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) +
+#   geom_col(color = "grey60") +
+#   scale_fill_manual(values = colors_urban_rural,
+#                     #direction = -1,
+#                     labels = function(x) str_wrap(x, width = 25)) +
+#   labs(x = "",
+#        y = "Area covered by cluster (km2)",
+#        color = "Rural / Urban Classification",
+#        title = "Composition of clusters: Rural / Urban") +
+#   theme_bw() +
+#   theme(axis.text.x = element_blank(),
+#         axis.ticks.x=element_blank(),
+#         legend.position = "bottom",
+#         legend.title = element_blank()) +
+#   guides(fill = guide_legend(nrow = 3)) +
+#   facet_wrap(facets = vars(cluster))
+#
+# ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+#
+#
+#
+#
+# ### ---- join the data to plot together
+# clusters_ur_poly %>%
+#   left_join(clusters_ur_poly_filt %>%
+#               rename(area_km_filt = area_km),
+#             by = c("cluster", "RUC11")) -> clusters_ur_poly_combined
+#
+#
+#
+# # --- plot both together
+# ggplot(clusters_ur_poly_combined) +
+#   geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) +
+#   geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) +
+#   scale_fill_manual(values = colors_urban_rural,
+#                     #direction = -1,
+#                     labels = function(x) str_wrap(x, width = 25)) +
+#   scale_color_manual(values = colors_urban_rural,
+#                      #direction = -1,
+#                      labels = function(x) str_wrap(x, width = 25)) +
+#   labs(x = "",
+#        y = "Area covered by cluster (km2)",
+#        fill = "Rural / Urban Classification",
+#        title = "Composition of clusters: Rural / Urban",
+#        subtitle = "Before and after intersecting with bus network",
+#        caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") +
+#   guides(color = "none", alpha = "none") +
+#   scale_alpha_identity() +  # Maintain alpha value
+#   theme_bw() +
+#   theme(axis.text.x = element_blank(),
+#         axis.ticks.x=element_blank(),
+#         legend.position = "bottom",
+#         legend.title = element_blank(),
+#         plot.caption = element_text(hjust = 0)) +
+#   guides(fill = guide_legend(nrow = 3)) +
+#   facet_wrap(facets = vars(cluster))
+#
+# ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
+#
+#
+#
+#
+#
+# # ---------- PLots of line bearings in each cluster
+#
+# # --- calculate bearings
+#
+# od_demand_figures_bearings <- od_demand_figures_filt %>%
+#   mutate(bearing = stplanr::line_bearing(.),
+#          bearing_adjusted = case_when(bearing < 0 ~ bearing + 360,
+#                                       .default =  bearing),
+#          bearing_180 = case_when(bearing < 0 ~ bearing * -1,
+#                                  .default =  bearing))
+#
+#
+#
+# # add length column
+# od_demand_figures_bearings <- od_demand_figures_bearings %>%
+#   mutate(distance_m =  units::drop_units(sf::st_length(.)))
+#
+# # --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots
+#
+#
+# # Define the breaks for the buckets
+# #breaks_angle <- seq(-10, 370, by = 20)
+# breaks_angle <- seq(0, 360, by = 30)
+# #breaks_angle <- seq(0, 180, by = 15)
+# breaks_distance <- seq(0, 50, by = 10)
+#
+# od_demand_figures_bearings <- od_demand_figures_bearings %>%
+#   mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE),
+#          # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE),
+#          bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>%
+#   mutate(bucket_distance = fct_rev(bucket_distance))
+#
+# # Ensure the bucket factor levels are ordered correctly
+# #bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE)
+#
+#
+# # --- Plot with no of lines only (geom_bar())
+#
+#
+# # bar
+# ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) +
+#   geom_bar() +
+#   facet_wrap(facets = "cluster") +
+#   labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") +
+#   scale_x_discrete(labels = breaks_angle) +
+#   theme(axis.text.x = element_text(angle = 45, hjust = 1),
+#         legend.position = "bottom")
+#
+# ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8)
+#
+#
+# # circle
+# ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) +
+#   geom_bar() +
+#   coord_polar(start = -0.1) +
+#   facet_wrap(facets = "cluster") +
+#   labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") +
+#   #scale_x_discrete(labels = breaks_angle) +
+#   scale_x_discrete(labels = breaks_angle) +
+#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
+#
+#
+#
+# # --- Plot with total commuters geom_col()
+#
+#
+# # bar
+# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
+#   geom_col() +
+#   #coord_polar() +
+#   facet_wrap(facets = "cluster") +
+#   labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") +
+#   scale_x_discrete(labels = breaks_angle) +
+#   theme(axis.text.x = element_text(angle = 45, hjust = 1),
+#         legend.position = "bottom")
+#
+# ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8)
+#
+# # circle
+# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
+#   geom_col() +
+#   coord_radial(rotate.angle = TRUE) +
+#   facet_wrap(facets = "cluster") +
+#   labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") +
+#   #scale_x_discrete(labels = breaks_angle) +
+#   scale_x_discrete(labels = breaks_angle) +
+#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
+#
+#
+#
+#
+# # TESTING FACET GRID
+#
+# # Select labels to print (one in every three)
+# labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)]
+#
+#
+# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) +
+#   geom_col() +
+#   #coord_polar() +
+#   facet_grid(bucket_distance ~ cluster,
+#              drop = TRUE) +
+#   labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") +
+#   # scale_x_discrete(labels = breaks_angle) +
+#   scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() +
+#   theme_bw() +
+#   theme(axis.text.x = element_text(angle = 60, hjust = 1),
+#         legend.position = "bottom")
+#
+#
+# ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10)
+#
+#
+#
+#
+#
+#
+#
+#
+#
+#
+#
diff --git a/code/demand_cluster_flows_run.R b/code/demand_cluster_flows_run.R
new file mode 100644
index 0000000..6e97183
--- /dev/null
+++ b/code/demand_cluster_flows_run.R
@@ -0,0 +1,251 @@
+sensitivity = TRUE
+day_time = "pt_wkday_06_30"
+source("code/demand_cluster_flows.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+sensitivity = TRUE
+day_time = "pt_wkday_09_30"
+source("code/demand_cluster_flows.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+sensitivity = TRUE
+day_time = "pt_wkday_12_30"
+source("code/demand_cluster_flows.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+sensitivity = TRUE
+day_time = "pt_wkday_15_30"
+source("code/demand_cluster_flows.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+sensitivity = TRUE
+day_time = "pt_wkday_18_30"
+source("code/demand_cluster_flows.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+geography = "MSOA"
+
+# --- Compare clusters at different points in the day
+dbscan_sensitivity_res_wkday_06_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+  mutate(scenario = "pt_wkday_06_30")
+dbscan_sensitivity_res_wkday_09_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+  mutate(scenario = "pt_wkday_09_30")
+dbscan_sensitivity_res_wkday_12_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+  mutate(scenario = "pt_wkday_12_30")
+dbscan_sensitivity_res_wkday_15_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+  mutate(scenario = "pt_wkday_15_30")
+dbscan_sensitivity_res_wkday_18_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>%
+  mutate(scenario = "pt_wkday_18_30")
+
+
+dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_06_30,
+                                           dbscan_sensitivity_res_wkday_09_30,
+                                           dbscan_sensitivity_res_wkday_12_30,
+                                           dbscan_sensitivity_res_wkday_15_30,
+                                           dbscan_sensitivity_res_wkday_18_30
+                                           )
+
+
+dbscan_sensitivity_res_compare %>%
+  filter(cluster != 0) %>%
+  group_by(id) %>%
+  #mutate(clusters = n()) %>%
+  # How many clusters have more than 5 od pairs in them?
+  mutate(clusters = sum(size > 30)) %>%
+  ungroup() %>%
+  filter(clusters > 25) %>%
+  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+  geom_col() +
+  scale_y_continuous(trans='log10') +
+  facet_grid(id ~ scenario, scales = "fixed") +
+  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+       subtitle = "Parameter combinations with > 25 clusters having at least 30 od pairs each",
+       x = "Cluster no.",
+       y = "No. of od pairs in cluster",
+       fill= "No. of commuters") +
+  theme_bw()
+
+
+ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_compare.png"), width = 14, height = 10)
+
+
+
+
+# -------------- Plots
+day_time = "pt_wkday_06_30"
+source("code/demand_cluster_flows_maps_temporal.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+
+day_time = "pt_wkday_09_30"
+source("code/demand_cluster_flows_maps_temporal.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+
+day_time = "pt_wkday_12_30"
+source("code/demand_cluster_flows_maps_temporal.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+
+day_time = "pt_wkday_15_30"
+source("code/demand_cluster_flows_maps_temporal.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+
+day_time = "pt_wkday_18_30"
+source("code/demand_cluster_flows_maps_temporal.R")
+print(paste0("finished scenario: ", day_time))
+# clear environment
+rm(list = ls())
+gc()
+
+
+# ----- Facet map with different times of day
+
+source("R/study_area_geographies.R")
+source("R/filter_od_matrix.R")
+# ---------------------- Load in data
+
+# --------  1. administrative boundaries
+study_area <- st_read("data/interim/study_area_boundary.geojson")
+# convert to desired resolution
+geography = "MSOA"
+study_area = study_area_geographies(study_area = study_area,
+                                    geography = geography)
+
+study_area <- study_area %>%
+  st_cast("MULTIPOLYGON")
+
+# move the geographic ID to the first column. od::points_to_od() only keeps the first column as ID
+geoid_col = paste0(geography, "21CD")
+
+study_area <- study_area %>%
+  relocate(all_of(geoid_col), .before = everything())
+
+
+# -------- 2. gtfs
+gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson")
+
+
+# -------- 3. Pop density base layer
+
+# --- pop density 1km grid
+oa_pop_density <- stars::read_stars("data/external/population_density_grid_uk/gbr_pd_2020_1km_UNadj.tif")
+# crop to study area
+oa_pop_density_crop <- st_crop(oa_pop_density, study_area)
+
+# -------- 4. DRT operating zone polygons
+
+# Specify the directory containing the GeoJSON files
+directory_path <- "data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined"
+
+# Get the list of all GeoJSON files of DRT boundaries (1 file per scenario)
+geojson_files <- list.files(directory_path, pattern = "\\.geojson$", full.names = TRUE)
+
+# Read each GeoJSON file into a list of sf objects
+geojson_list <- purrr::map(geojson_files, st_read)
+# prepare for plotting
+combined_geojson <- do.call(rbind, geojson_list)
+
+combined_geojson <- combined_geojson %>%
+  st_transform(st_crs(study_area)) %>%
+  st_make_valid()
+
+# plot
+tm_shape(st_union(study_area)) +
+  tm_borders(lwd =2,
+             col = "grey15") +
+  tm_shape(oa_pop_density_crop) +
+  tm_raster(title = "People / Km2",
+            palette = "Blues",
+            alpha = 0.5,
+            style = "log10_pretty") +
+  # bus layer
+  tm_shape(gtfs_bus %>%
+             filter(startsWith(scenario, "pt_wkday")) %>%
+             mutate(headway_inv = (1/headway_secs) * 3600) %>%
+             filter(headway_secs < 7200)) +
+  tm_lines(col = "darkred",
+           lwd = "headway_inv",
+           scale = 5.5,
+           palette = "-YlOrRd",
+           style = "pretty",
+           legend.col.show = FALSE,
+           alpha = 0.1,
+           title.lwd = "Buses/Hour",
+           #legend.lwd.is.portrait = FALSE
+  ) +
+  tm_facets(by = "scenario",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = 2,
+            showNA = FALSE) +
+  # ---- clusters
+  # poly border
+  tm_shape(combined_geojson %>%
+             st_buffer(1000)) +
+  tm_borders(col = "darkgreen",
+             lwd = 3.5,
+             lty = "dashed") +
+  tm_facets(by = "scenario",
+            #by = "commute_all",
+            free.coords = FALSE,
+            nrow = 2,
+            showNA = FALSE) +
+  tm_layout(fontfamily = 'Georgia',
+            main.title = paste0("Potential DRT Operating Zones (Temporal Variation)"),
+            main.title.size = 1.1,
+            main.title.color = "azure4",
+            main.title.position = "left",
+            #legend.outside = TRUE,
+            #legend.outside.position = "bottom",
+            #legend.stack = "horizontal",
+            # remove panel headers
+            # panel.show = FALSE,
+            panel.label.size = 1,
+            panel.label.bg.color = NA,
+            #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)),
+            frame = FALSE)  +
+  # add a couple of legends
+  tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2) -> map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES
+
+map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES
+
+plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/")
+
+tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES, filename = paste0(plots_path, "map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES.png"), width = 12, dpi = 1080, asp = 0)
+
+
+
+
+
+
+
+
diff --git a/code/demand_on_buses.R b/code/demand_on_buses.R
index 01b8806..a2ee745 100644
--- a/code/demand_on_buses.R
+++ b/code/demand_on_buses.R
@@ -51,7 +51,7 @@ gtfs_paths = paste0(gtfs_dir, gtfs_names)
 
 
 # --- read in the feeds
-gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus", gtfs_paths)])
+gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus_temporal", gtfs_paths)])
 #gtfs_rail <- gtfstools::read_gtfs(gtfs_paths[grepl("rail", gtfs_paths)])
 
 # # --- filter the feeds to a specific point in time
@@ -241,7 +241,7 @@ plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/temporal/"
 # TRIP LEVEL
 
 trips_sd = trips_sd %>%
-  mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening")))
+  mutate(combination = factor(combination, levels = c("pt_wkday_06_30", "pt_wkday_09_30", "pt_wkday_12_30", "pt_wkday_15_30", "pt_wkday_18_30")))
 
 
 # equal split
@@ -314,7 +314,7 @@ ggsave(filename = paste0(plots_path, "plot_hist_potential_demand_routes_two_meth
 #  ----- OD LEVEL
 
 od_trips_sd = od_trips_sd %>%
-  mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening")))
+  mutate(combination = factor(combination, levels = c("pt_wkday_06_30", "pt_wkday_09_30", "pt_wkday_12_30", "pt_wkday_15_30", "pt_wkday_18_30")))
 
 # equal split
 
@@ -331,7 +331,7 @@ od_trips_sd %>%
   ungroup() %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
-  geom_histogram(binwidth = 0.5, alpha = 0.8) +
+  geom_histogram(binwidth = 0.2, alpha = 0.8) +
   facet_grid(.~combination) +
   #geom_density()+
   #scale_y_log10() +
@@ -366,7 +366,7 @@ od_trips_sd %>%
   ungroup() %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
-  geom_histogram(binwidth = 0.5, alpha = 0.8) +
+  geom_histogram(binwidth = 0.2, alpha = 0.8) +
   #scale_y_log10() +
   facet_grid(combination ~ distribution_type) +
   labs(title = "Potential demand on PT routes",
@@ -396,7 +396,7 @@ od_trips_sd %>%
   ungroup() %>%
   mutate(potential_demand = round(potential_demand / 1000, 2)) %>%
   ggplot(aes(x = potential_demand)) +
-  geom_histogram(binwidth = 0.5, alpha = 0.8) +
+  geom_histogram(binwidth = 0.2, alpha = 0.8) +
   #scale_y_log10() +
   facet_grid(combination ~ distribution_type) +
   labs(title = "Potential demand on PT routes",
diff --git a/code/eda_travel_demand_nobus.R b/code/eda_travel_demand_nobus.R
index 33020a2..11441e7 100644
--- a/code/eda_travel_demand_nobus.R
+++ b/code/eda_travel_demand_nobus.R
@@ -55,8 +55,8 @@ plots_path <- paste0("data/processed/plots/eda/flows_no_direct/", geography, "/"
 # ---------- 2. Filter GTFS by time of day (one feed for each combination)
 
 # feeds
-gtfs_bus <- gtfs_feeds$study_area_gtfs_bus_f.zip
-gtfs_rail <- gtfs_feeds$study_area_gtfs_rail_f.zip
+gtfs_bus <- gtfs_feeds$study_area_gtfs_bus_temporal_f.zip
+gtfs_rail <- gtfs_feeds$study_area_gtfs_rail_temporal_f.zip
 # add shapes.txt to the rail feed
 gtfs_rail <- UK2GTFS::ATOC_shapes(gtfs_rail)
 
@@ -65,15 +65,21 @@ gtfs_rail <- UK2GTFS::ATOC_shapes(gtfs_rail)
 scenarios <- tribble(
   ~scenario, ~date, ~min_departure_time, ~max_arrival_time,
   # public transport at different times of day / week
-  "pt_wkday_morning", "2023-08-14", "06:00:00", "09:00:00",
-  "pt_wkday_afternoon", "2023-08-14", "11:00:00", "14:00:00",
-  "pt_wkday_evening", "2023-08-14", "17:00:00", "20:00:00",
-  "pt_wkday_night", "2023-08-14", "21:30:00", "23:59:00:00",
-  "pt_wkend_morning", "2023-08-13", "06:00:00", "09:00:00",
-  "pt_wkend_evening", "2023-08-13", "17:00:00", "20:00:00",
+  "pt_wkday_06_30", "2023-08-14", "05:00:00", "08:00:00",
+  "pt_wkday_09_30", "2023-08-14", "08:00:00", "11:00:00",
+  "pt_wkday_12_30", "2023-08-14", "11:00:00", "14:00:00",
+  "pt_wkday_15_30", "2023-08-14", "14:00:00", "17:00:00",
+  "pt_wkday_18_30", "2023-08-14", "17:00:00", "20:00:00",
+  "pt_wkend_06_30", "2023-08-14", "05:00:00", "08:00:00",
+  "pt_wkend_09_30", "2023-08-14", "08:00:00", "11:00:00",
+  "pt_wkend_12_30", "2023-08-14", "11:00:00", "14:00:00",
+  "pt_wkend_15_30", "2023-08-14", "14:00:00", "17:00:00",
+  "pt_wkend_18_30", "2023-08-14", "17:00:00", "20:00:00",
 )
 
 
+
+
 # Bus
 gtfs_bus_filtered <- filter_gtfs_feed_internal(gtfs_feed = gtfs_bus,
                                                scenarios = scenarios,
@@ -97,8 +103,8 @@ gtfs_bus_filtered_df <- bind_rows(gtfs_bus_filtered)
 gtfs_rail_filtered_df <- bind_rows(gtfs_rail_filtered)
 
 # save the output
-st_write(gtfs_bus_filtered_df, "data/interim/gtfs_freq/gtfs_bus_sf.geojson", delete_dsn =TRUE)
-st_write(gtfs_rail_filtered_df, "data/interim/gtfs_freq/gtfs_rail_sf.geojson", delete_dsn =TRUE)
+st_write(gtfs_bus_filtered_df, "data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson", delete_dsn =TRUE)
+st_write(gtfs_rail_filtered_df, "data/interim/gtfs_freq/gtfs_rail_sf_temporal.geojson", delete_dsn =TRUE)
 
 # Bus headway (morning peak)
 
@@ -110,7 +116,7 @@ tm_shape(study_area) +
           alpha = 0.5) +
   tm_shape(gtfs_bus_filtered_df %>%
              mutate(headway_inv = 1/headway_secs) %>%
-             filter(headway_secs < 3600, scenario == "pt_wkday_morning")) +
+             filter(headway_secs < 3600, scenario == "pt_wkday_06_30")) +
   tm_lines(col = "headway_secs",
            lwd = "headway_inv",
            scale = 5,
diff --git a/code/gtfs_convert_stop_times_to_freq.R b/code/gtfs_convert_stop_times_to_freq.R
index 5089463..4acf1a1 100644
--- a/code/gtfs_convert_stop_times_to_freq.R
+++ b/code/gtfs_convert_stop_times_to_freq.R
@@ -21,16 +21,16 @@ gtfs_rail <- tidytransit::read_gtfs("data/interim/study_area_gtfs_rail.zip")
 
 # apply the function
 gtfs_freq_bus = stop_times_to_frequencies(gtfs = gtfs_bus,
-                                      time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00"),
-                                                           end_time = c("05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00", "23:59:00")))
+                                      time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "06:30:00", "09:30:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00"),
+                                                           end_time = c("05:00:00", "06:30:00", "09:00:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00", "23:59:00")))
 
 # apply the function
 gtfs_freq_rail = stop_times_to_frequencies(gtfs = gtfs_rail,
-                                          time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00"),
-                                                               end_time = c("05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00", "23:59:00")))
+                                           time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "06:30:00", "09:30:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00"),
+                                                                end_time = c("05:00:00", "06:30:00", "09:00:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00", "23:59:00")))
 
 
 # save the new feed
-tidytransit::write_gtfs(gtfs_freq_bus, "data/interim/gtfs_freq/study_area_gtfs_bus_f.zip")
-tidytransit::write_gtfs(gtfs_freq_rail, "data/interim/gtfs_freq/study_area_gtfs_rail_f.zip")
+tidytransit::write_gtfs(gtfs_freq_bus, "data/interim/gtfs_freq/study_area_gtfs_bus_temporal_f.zip")
+tidytransit::write_gtfs(gtfs_freq_rail, "data/interim/gtfs_freq/study_area_gtfs_rail_temporal_f.zip")
 
diff --git a/code/performance_od.R b/code/performance_od.R
index d6d806b..9f8b6c0 100644
--- a/code/performance_od.R
+++ b/code/performance_od.R
@@ -123,7 +123,7 @@ tm_shape(study_area) +
   tm_borders(col = "grey60",
              alpha = 0.5) +
   tm_shape(od_demand_sf_rank %>%
-             filter(combination == "pt_wkday_morning") %>%
+             filter(combination == "pt_wkday_06_30") %>%
              mutate(n_rides = as.factor(round(n_rides)))) +
   tm_lines(col = "n_rides",
            lwd = "total_flow",
@@ -166,7 +166,7 @@ tm_shape(study_area) +
   tm_borders(col = "grey60",
              alpha = 0.5) +
   tm_shape(od_demand_sf_rank %>%
-             filter(combination == "pt_wkday_morning", n_rides == 1)) +
+             filter(combination == "pt_wkday_06_30", n_rides == 1)) +
   tm_lines(col = "total_flow",
            lwd = "total_flow",
            legend.lwd.show = FALSE,
@@ -207,7 +207,7 @@ tm_shape(study_area) +
   tm_fill(col = "grey80",
           alpha = 0.5) +
   tm_shape(od_demand_sf_rank %>%
-             filter(combination == "pt_wkday_morning", speed_percentile <= 0.25)) +
+             filter(combination == "pt_wkday_06_30", speed_percentile <= 0.25)) +
   tm_lines(col = "total_flow",
            lwd = "total_flow",
            scale = 10,
diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R
index 97d6857..37ea7f9 100644
--- a/code/travel_demand_temporal.R
+++ b/code/travel_demand_temporal.R
@@ -122,7 +122,7 @@ cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
 # ----------  MSOA level
 
 # read in the data
-tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded.parquet")
+tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded_temporal.parquet")
 
 # some OD pairs don't have travel time data (for some combinations). Expand grid so that we explitly mention these OD pairs
 tt_matrix_msoa_exp <- tidyr::crossing(from_id = tt_matrix_msoa$from_id, to_id = tt_matrix_msoa$to_id, combination = tt_matrix_msoa$combination)
@@ -157,9 +157,11 @@ tt_matrix_msoa <- tt_matrix_msoa %>%
 # add combination column to cpc data
 cpc_matrices_all_internal <- cpc_matrices_all_internal %>%
   mutate(combination = case_when(
-    source %in% c("05-06", "06-07", "07-08", "08-09", "09-10", "10-11", "11-12") ~ "pt_wkday_morning",
-    source %in% c("12-13", "13-14", "14-15", "15-16", "16-17") ~ "pt_wkday_afternoon",
-    source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_evening")
+    source %in% c("05-06", "06-07", "07-08") ~ "pt_wkday_06_30",
+    source %in% c("08-09", "09-10", "10-11") ~ "pt_wkday_09_30",
+    source %in% c("11-12", "12-13", "13-14") ~ "pt_wkday_12_30",
+    source %in% c("14-15", "15-16", "16-17") ~ "pt_wkday_15_30",
+    source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_18_30")
     )
 
 # group by combination column

From f9bd6b427ea501eb6644fe3b97d4b7d141c07d17 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Mon, 18 Nov 2024 18:49:33 +0100
Subject: [PATCH 15/16] updates

---
 code/demand_cluster_flows_maps_temporal.R | 36 ++++++++------
 code/demand_cluster_flows_prep.R          |  1 -
 code/demand_cluster_flows_run.R           | 57 +++++++++++++++++++----
 3 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R
index 3ca40fa..b85849b 100644
--- a/code/demand_cluster_flows_maps_temporal.R
+++ b/code/demand_cluster_flows_maps_temporal.R
@@ -21,7 +21,10 @@ clustering <- "equal"
 distance_threshold <- 50000   # 10000
 
 # save plots?
-save = FALSE
+save = TRUE
+
+# # minimum number of commuters in a cluster for it to be part of our analysis
+# commuters_sum_minimum = 150
 
 #  ------------------------ Load in the data  -------------------------- #
 
@@ -53,8 +56,15 @@ study_area <- study_area %>%
   relocate(all_of(geoid_col), .before = everything())
 
 
-plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/")
-
+polygons_path <-paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/polygons_combined/", "min_commuters_", commuters_sum_minimum, "/")
+if (!dir.exists(polygons_path)) {
+  dir.create(polygons_path, recursive = TRUE)
+}
+plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/", "min_commuters_", commuters_sum_minimum, "/")
+# Check if the directory exists, and if not, create it
+if (!dir.exists(plots_path)) {
+  dir.create(plots_path, recursive = TRUE)
+}
 
 # ------------------------- VISUALISE RESULTS ------------------------- #
 
@@ -85,7 +95,7 @@ plot(cluster_dbscan_res["cluster"])
 # get clusters to map
 cluster_dbscan_res %>%
   filter(size > 15, size < 5000) %>%
-  filter(commuters_sum > 200) %>%
+  filter(commuters_sum > commuters_sum_minimum) %>%
   filter(cluster != 0) -> clusters_vis
 
 
@@ -295,7 +305,7 @@ if(save == TRUE){
 
 clusters_vis_mode_poly <- clusters_vis_mode %>%
   # filter(size > 7, size < 5000) %>%
-  # filter(commuters_sum > 200) %>%
+  # filter(commuters_sum > commuters_sum_minimum) %>%
   filter(cluster != 0) %>%
   mutate(cluster = as.factor(cluster)) %>%
   #head(1000) %>%
@@ -919,7 +929,7 @@ tm_shape(study_area) +
   # lines
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1023,7 +1033,7 @@ tm_shape(study_area) +
   # lines
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1048,7 +1058,7 @@ tm_shape(study_area) +
   # START AND ENDPOINTS
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1067,7 +1077,7 @@ tm_shape(study_area) +
             showNA = FALSE) +
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1161,7 +1171,7 @@ tm_shape(study_area) +
   # lines
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1186,7 +1196,7 @@ tm_shape(study_area) +
   # START AND ENDPOINTS
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1205,7 +1215,7 @@ tm_shape(study_area) +
             showNA = FALSE) +
   tm_shape(clusters_vis_mode %>%
              filter(size > 7, size < 5000) %>%
-             filter(commuters_sum > 200) %>%
+             filter(commuters_sum > commuters_sum_minimum) %>%
              filter(cluster != 0) %>%
              filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>%
              mutate(cluster = as.factor(cluster)) %>%
@@ -1473,7 +1483,7 @@ clusters_vis_mode_poly_filt3_all = clusters_vis_mode_poly_filt3 %>%
   mutate(scenario = day_time)
 
 # Save to make temporal comparison plot
-st_write(clusters_vis_mode_poly_filt3_all, paste0(plots_path, "polygons_combined/", day_time, ".geojson"), delete_dsn = TRUE)
+st_write(clusters_vis_mode_poly_filt3_all, paste0(polygons_path, day_time, ".geojson"), delete_dsn = TRUE)
 
 
 
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
index 75491c3..de523f8 100644
--- a/code/demand_cluster_flows_prep.R
+++ b/code/demand_cluster_flows_prep.R
@@ -305,7 +305,6 @@ plot(st_geometry(od_demand_jittered %>%
 # ----- Option 1: All OD pairs
 
 od_demand_1 <- od_demand_filtered
-
 # ----- Option 2: OD pairs with poor PT supply (many transfers or low travel speed)
 
 od_demand_2 <- od_demand_filtered %>%
diff --git a/code/demand_cluster_flows_run.R b/code/demand_cluster_flows_run.R
index 6e97183..556bc2b 100644
--- a/code/demand_cluster_flows_run.R
+++ b/code/demand_cluster_flows_run.R
@@ -1,3 +1,8 @@
+
+
+# -------------------- Clusteting
+
+
 sensitivity = TRUE
 day_time = "pt_wkday_06_30"
 source("code/demand_cluster_flows.R")
@@ -83,15 +88,15 @@ dbscan_sensitivity_res_compare %>%
 
 ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_compare.png"), width = 14, height = 10)
 
-
-
+# minimum number of commuters in a cluster for it to be part of our analysis
+commuters_sum_minimum = 150
 
 # -------------- Plots
 day_time = "pt_wkday_06_30"
 source("code/demand_cluster_flows_maps_temporal.R")
 print(paste0("finished scenario: ", day_time))
 # clear environment
-rm(list = ls())
+rm(list = setdiff(ls(), "commuters_sum_minimum"))
 gc()
 
 
@@ -99,7 +104,7 @@ day_time = "pt_wkday_09_30"
 source("code/demand_cluster_flows_maps_temporal.R")
 print(paste0("finished scenario: ", day_time))
 # clear environment
-rm(list = ls())
+rm(list = setdiff(ls(), "commuters_sum_minimum"))
 gc()
 
 
@@ -107,7 +112,7 @@ day_time = "pt_wkday_12_30"
 source("code/demand_cluster_flows_maps_temporal.R")
 print(paste0("finished scenario: ", day_time))
 # clear environment
-rm(list = ls())
+rm(list = setdiff(ls(), "commuters_sum_minimum"))
 gc()
 
 
@@ -115,7 +120,7 @@ day_time = "pt_wkday_15_30"
 source("code/demand_cluster_flows_maps_temporal.R")
 print(paste0("finished scenario: ", day_time))
 # clear environment
-rm(list = ls())
+rm(list = setdiff(ls(), "commuters_sum_minimum"))
 gc()
 
 
@@ -123,10 +128,11 @@ day_time = "pt_wkday_18_30"
 source("code/demand_cluster_flows_maps_temporal.R")
 print(paste0("finished scenario: ", day_time))
 # clear environment
-rm(list = ls())
+rm(list = setdiff(ls(), "commuters_sum_minimum"))
 gc()
 
 
+
 # ----- Facet map with different times of day
 
 source("R/study_area_geographies.R")
@@ -164,7 +170,7 @@ oa_pop_density_crop <- st_crop(oa_pop_density, study_area)
 # -------- 4. DRT operating zone polygons
 
 # Specify the directory containing the GeoJSON files
-directory_path <- "data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined"
+directory_path <- paste0("data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined/", commuters_sum_minimum, "/")
 
 # Get the list of all GeoJSON files of DRT boundaries (1 file per scenario)
 geojson_files <- list.files(directory_path, pattern = "\\.geojson$", full.names = TRUE)
@@ -238,7 +244,8 @@ tm_shape(st_union(study_area)) +
 
 map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES
 
-plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/")
+plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/", "min_commuters_", commuters_sum_minimum)
+
 
 tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES, filename = paste0(plots_path, "map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES.png"), width = 12, dpi = 1080, asp = 0)
 
@@ -249,3 +256,35 @@ tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#  ------------------------ Test clustering input  -------------------------- #
+
+
+od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"))
+
+
+od_demand_jittered %>%
+  st_drop_geometry() %>%
+  pivot_longer(cols = c(scenario_1, scenario_2, scenario_3),
+               names_to = "scenario") %>%
+  group_by(combination, scenario) %>%
+  summarise(number_of_ods = sum(value == 1))
+

From f92199c92098bb0292d08f2405061e34c5daeae7 Mon Sep 17 00:00:00 2001
From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com>
Date: Mon, 9 Dec 2024 22:06:52 +0100
Subject: [PATCH 16/16] tmap lines legend

---
 code/demand_cluster_flows_maps_temporal.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R
index b85849b..4a34e0c 100644
--- a/code/demand_cluster_flows_maps_temporal.R
+++ b/code/demand_cluster_flows_maps_temporal.R
@@ -1184,7 +1184,7 @@ tm_shape(study_area) +
            alpha = 0.4,
            title.col = "Travel demand",
            #title.lwd = "No. of commuters",
-           legend.col.show = FALSE,
+           #legend.col.show = FALSE,
            legend.lwd.show = FALSE,
            # remove "missing from legend
            showNA = FALSE) +