From f25c98ac8e4e24ade57e4af7aaada39b72de82a8 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:39:47 +0100 Subject: [PATCH 01/16] cpc data request --- code/x_cpc_msoa_zoning_request.R | 282 +++++++++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 code/x_cpc_msoa_zoning_request.R diff --git a/code/x_cpc_msoa_zoning_request.R b/code/x_cpc_msoa_zoning_request.R new file mode 100644 index 0000000..35a4c74 --- /dev/null +++ b/code/x_cpc_msoa_zoning_request.R @@ -0,0 +1,282 @@ +library(tidyverse) +library(sf) +library(tmap) + + +# ---------- Read in data + +# --- 1. CPC data template - 2011 MSOA layer +cpc_zones <- read_csv("data/external/cpc/ZoningTemplate.csv") + +# --- 2. 2011 MSOA boundaries (geo) +msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp") + +# --- 3. Commuting matrices (2021) +msoa_commute_2021 <- read_csv("data/raw/travel_demand/od_census_2021/ODWP15EW_MSOA_v1.zip") + +# simplify +msoa_commute_2021 <- msoa_commute_2021 %>% + rename(msoa_origin = "Middle layer Super Output Areas code", msoa_destination = "MSOA of workplace code") %>% + # aggregate: we don't care about gender and age breakdown + group_by(msoa_origin, msoa_destination) %>% + summarise(commuters = sum(Count)) %>% + ungroup() + +# add 2011 msoa names +msoa_2011_2021 <- read_csv("data/external/cpc/MSOA_2011_2021.csv") %>% + select(MSOA11CD, MSOA21CD, LAD22CD) + +msoa_commute_2021_2011 <- msoa_commute_2021 %>% + # add 2011 msoa codes (origin) + left_join(msoa_2011_2021 %>% + rename_with(~ paste0(.x, "_origin")), + by = c("msoa_origin" = "MSOA21CD_origin")) %>% + # add 2011 msoa codes (destination) + left_join(msoa_2011_2021 %>% + rename_with(~ paste0(.x, "_destination")), + by = c("msoa_destination" = "MSOA21CD_destination")) + + + + +# ---------- Join the geo data (2) onto (1) +cpc_zones_geo <- cpc_zones %>% + left_join(msoa_shp_2011 %>% + select(geo_code), + by = c("msoa" = "geo_code")) %>% + st_as_sf() + + +cps_zones_yh_region <- cpc_zones_geo %>% + filter(region_name == "Yorkshire and The Humber") + +cps_zones_wy_county <- cpc_zones_geo %>% + filter(county_name == "West Yorkshire") + + + +# ---------- Commuting matrices + +# which OD pairs have an origin or destination inside our study area +msoa_commute_2021_2011_region = msoa_commute_2021_2011 %>% + filter(MSOA11CD_origin %in% cps_zones_wy_county$msoa | + MSOA11CD_destination %in% cps_zones_wy_county$msoa) + + +# ---------- + +msoas_of_interest <- cpc_zones_geo %>% + filter(msoa %in% msoa_commute_2021_2011_region$MSOA11CD_origin | + msoa %in% msoa_commute_2021_2011_region$MSOA11CD_destination) + + + +# label msoas in west yorkshire + +msoas_of_interest %>% + group_by(lad) %>% + mutate(zone_id = n()) %>% + mutate(zone_id = as.character(zone_id)) %>% + ungroup() -> x + + +tm_shape(x)+ + tm_fill(col = "zone_id", + palette = "Dark2") + + +# ------------------------- APPROACH (msoa -> lad -> county) + +# ----- study area +study_area_county = "E11000006" +study_area_region = "E12000003" + +# 1.LEVEL = MSOA + +# --- west yorkshire +cpc_zones_wy = cpc_zones_geo %>% + filter(county == study_area_county) %>% + mutate(id_col = row_number(), + zone_type = 1) # internal to the model + + +# 2.LEVEL = LAD + +# --- yorkshire humber region +cpc_zones_yh_region = cpc_zones_geo %>% + filter(region == study_area_region) %>% + filter(! msoa %in% cpc_zones_wy$msoa) %>% + group_by(lad) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_wy$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# --- greater manchester and lancashire and derbyshire +cpc_zones_other_neighbors = cpc_zones_geo %>% + filter(county_name %in% c("Greater Manchester", "Derbyshire", "Lancashire")) %>% + #filter(! msoa %in% cpc_zones_wy$msoa) %>% + group_by(lad) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_yh_region$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# Combine (LEVEL 1) and (LEVEL 2) + +cpc_zones_study_plus = cpc_zones_wy %>% + bind_rows(cpc_zones_yh_region) %>% + bind_rows(cpc_zones_other_neighbors) + +# 3.LEVEL = COUNTY (OR REGION - check which one I used) + +# ----- Other areas + +cpc_zones_other = cpc_zones_geo %>% + filter(country_name == "England") %>% + filter(! msoa %in% cpc_zones_study_plus$msoa) %>% + group_by(region) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_study_plus$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# 4.LEVEL = COUNTRY + +# ----- Scotland and Wales + +cpc_zones_countries = cpc_zones_geo %>% + filter(country_name != "England") %>% + group_by(country_name) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_other$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# bring all together +cpc_zones_all = cpc_zones_study_plus %>% + bind_rows(cpc_zones_other) %>% + bind_rows(cpc_zones_countries) + + +cpc_zones_all <- cpc_zones_all %>% + mutate(zone_id = id_col) + + +cpc_zones_all_df = cpc_zones_all %>% + select(msoa:msoa_name) %>% + st_drop_geometry() + +# Save +write_csv(cpc_zones_all_df, "data/external/cpc/ZoningTemplateFilled_lad_region.csv") + + +cpc_zones_all %>% filter(zone_id %in% c(347, 348)) -> x + +tm_shape(x)+ + tm_fill(col = "zone_id", + palette = "Dark2", + legend.show = FALSE) + + +# ------------------------- APPROACH (msoa -> lad) + scot wales as countries (could not use them as LADs due to an error on cpc website) ---------- # +# ------- THIS IS THE LAYER I SUBMITTED ------- # + +# ----- study area +study_area_county = "E11000006" + +# 1.LEVEL = MSOA + +# --- west yorkshire +cpc_zones_wy = cpc_zones_geo %>% + filter(county == study_area_county) %>% + mutate(id_col = row_number(), + zone_type = 1) # internal to the model + + +# 2.LEVEL = LAD + +# --- yorkshire humber region +cpc_zones_other = cpc_zones_geo %>% + filter(country_name == "England") %>% + filter(! msoa %in% cpc_zones_wy$msoa) %>% + group_by(lad) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_wy$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# ----- Scotland and Wales + +cpc_zones_scot_wales = cpc_zones_geo %>% + filter(country_name != "England") %>% + group_by(country_name) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_other$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + + +# Combine + +cpc_zones_all = cpc_zones_wy %>% + bind_rows(cpc_zones_other) %>% + bind_rows(cpc_zones_scot_wales) %>% + mutate(zone_id = id_col) + +cpc_zones_all_df = cpc_zones_all %>% + select(msoa:msoa_name) %>% + st_drop_geometry() + +# Save +write_csv(cpc_zones_all_df, "data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv") + + +cpc_zones_all %>% filter(zone_id %in% c(629, 643, 652)) -> x + +tm_shape(x)+ + tm_fill(col = "lad", + palette = "Dark2") + + + + + + + + + + + + +tm_shape(cpc_zones_all %>% + filter(zone_id == 375))+ + tm_fill(col = "zone_id", + palette = "Dark2", + legend.show = FALSE) + +cpc_zones_all %>% + filter(zone_id == 375) -> x +# PLOTS + +tm_shape(cpc_zones_all)+ + tm_fill(col = "zone_id", + palette = "Dark2", + legend.show = FALSE) + + +tm_shape(cpc_zones_all %>% + #filter(county_name == "West Yorkshire") %>% + filter(region_name == "Yorkshire and The Humber") %>% + mutate(id_col = as.character(id_col)))+ + tm_fill(col = "lad", + palette = "Dark2") + + +tm_shape(cpc_zones_all %>% + filter(county_name == "Inner London" | county_name == "Outer London")) + + tm_fill(col = "lad", + palette = "Dark2") + + +unique(cpc_zones_all2$zone_id) From f55879ba39b990b22811d46083dff96352eb9795 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:09:45 +0100 Subject: [PATCH 02/16] cpc data plots --- code/x_cpc_exploration.R | 246 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 code/x_cpc_exploration.R diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R new file mode 100644 index 0000000..2adf051 --- /dev/null +++ b/code/x_cpc_exploration.R @@ -0,0 +1,246 @@ +library(tidyverse) +library(sf) +library(od) +library(tmap) + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") + +plots_path <- "data/processed/plots/eda/cpc/" + +# ----------------------- READ IN THE DATA ----------------------- # + +# ------- Zoning layer (2011 MSOAs) + +zones <- read_csv("data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv") +# study area zones +zones_internal = zones %>% + filter(zone_type == 1) + +# ------- CPC data + +# Define the path to the ZIP file +zip_file_path <- "data/external/cpc/cpc_west_yorkshire_weekdays_march_may.zip" +# Create a temporary directory to extract the files +temp_dir <- tempdir() +# Extract the ZIP file to the temporary directory +unzip(zip_file_path, exdir = temp_dir) +# Get a list of all CSV files in the temporary directory +csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE) + +# Read each CSV file into a list of data frames +cpc_matrices <- purrr::map(csv_files, read.csv) + +# Extract the part of the filename after the underscore and before the .csv extension +file_names <- basename(csv_files) +short_names <- stringr::str_extract(file_names, "(?<=_).*(?=\\.csv)") + +# Name each data frame in the list by its original file name +names(cpc_matrices) <- short_names + +# --- Combine the dfs + +# Add a new column to each data frame with the name of the data frame +cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y)) + +# Combine into one df +cpc_matrices_all <- bind_rows(cpc_matrices) + +# Add column to sum all flow types +cpc_matrices_all <- cpc_matrices_all %>% + mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE)) +# ------- Geo boundaries + +# England +msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp") + +# West Yorkshire (All zones with zone_type == 1) + +study_area_large <- msoa_shp_2011 %>% + filter(geo_code %in% zones_internal$msoa) %>% + rename(MSOA11CD = geo_code) + +# Leeds + +# --- administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +# ADD 2011 MSOA codes to study area +msoa_2011_2021_lookup <- read_csv("data/external/cpc/MSOA_2011_2021.csv") + +study_area <- study_area %>% + left_join(msoa_2011_2021_lookup %>% + select(c("MSOA11CD", "MSOA21CD")), + by = "MSOA21CD") + + + +# ----------------------- ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- # + +# ----- filter zones to study area + +# filter matrices to keep study area zones only +cpc_matrices_all_internal <- cpc_matrices_all %>% + filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id) + + +# ----- Add desire lines + +# prepare data for od::od_to_sf() + +# ----- add msoa 2011 codes +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + # join origin data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(from_msoa = msoa, from_msoa_name = msoa_name), + by = c("from_zone" = "zone_id")) %>% + # join destination data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(to_msoa = msoa, to_msoa_name = msoa_name), + by = c("to_zone" = "zone_id")) %>% + # move msoa codes to first two columns (for od::od_to_sf()) + relocate("from_msoa", .before = everything()) %>% + relocate("to_msoa", .after = "from_msoa") + +# od::od_to_sf + +# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal, +# z = study_area_cpc_internal, +# silent = FALSE) + +# remove short ods +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + rename(Origin = from_msoa, Destination = to_msoa) + +# Filter matrix by distance also adds desire lines +cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, od_matrix = cpc_matrices_all_internal, dist_threshold = 1000) + + + + + + + + + +map_cpc_flows = function(od_matrix, + area, + column_to_map, + cols, + min_flows){ + + # --- study_area + # all of WY + if (area == "large"){ + study_area = study_area_large + # Leeds only + } else if (area == "small") { + study_area = study_area + } + + # ----- We filter the od_matrix to only include flows that start and end in the study_Area + od_matrix = od_matrix %>% + filter(Origin %in% study_area$MSOA11CD & + Destination %in% study_area$MSOA11CD) + # arrange so that highest flows are plotting on top + od_matrix = od_matrix %>% arrange(!!sym(column_to_map)) + + # ----- Plot + + # Use the dynamic column name in the filter and tm_lines functions + tm_shape(study_area) + + tm_borders(col = "grey80", alpha = 0.7) + + tm_shape(od_matrix %>% + filter((!!sym(column_to_map)) > min_flows)) + + tm_lines(col = column_to_map, + title.col = "Number of people", + lwd = column_to_map, + legend.lwd.show = FALSE, + palette = "YlGnBu", # YlOrRd # PuBuGn + scale = 7) + + tm_facets(by = "source", + free.coords = FALSE, + ncol = cols) + + # START AND ENDPOINTS + tm_shape(od_matrix %>% + filter((!!sym(column_to_map)) > min_flows) %>% + mutate(geometry = lwgeom::st_startpoint(.))) + + tm_dots(col = "darkgreen", + alpha = 0.4, + #jitter = 0.1, + size = column_to_map, + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "source", + free.coords = FALSE, + ncol = cols, + showNA = FALSE) + + tm_shape(od_matrix %>% + filter((!!sym(column_to_map)) > min_flows) %>% + mutate(geometry = lwgeom::st_endpoint(.))) + + tm_dots(col = "red", + alpha = 0.4, + #jitter = 0.1, + size = column_to_map, + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "source", + free.coords = FALSE, + ncol = cols, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Flows: Purpose = ", column_to_map), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + panel.label.size = 1, + panel.label.bg.color = NA, + # legend.outside = TRUE, + # legend.outside.position = "bottom", + # legend.stack = "horizontal", + frame = FALSE) + + tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') + + tm_add_legend(type = "symbol", labels = 'OD End', col = 'red') -> res + + res + + tmap_save(tm = res, filename = paste0(plots_path, "cpc_facet_", column_to_map, "_scope_", area, ".png"), width = 12, dpi = 1080, asp = 0) + + + + + + + + } + +# # Apply the function for a specific column +# map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf, +# area = "small", # c(small, large) +# column_to_map = "hbo_outbound", +# cols = 5, +# min_flows = 20) + + +# Apply to all columns +columns = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow") + +# SMALL (Leeds Boundary) +purrr::map(.x = columns, ~map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf, + area = "small", + column_to_map = .x, + cols = 5, + min_flow = 10)) + +# LARGE (West Yorskshire) +purrr::map(.x = columns, ~map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf, + area = "large", + column_to_map = .x, + cols = 5, + min_flow = 10)) From 0df20125eec2ffc571a55d58461c29da443537ee Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Sun, 27 Oct 2024 09:50:07 +0000 Subject: [PATCH 03/16] save tt and demand msoa level --- code/travel_demand_census.R | 2 +- code/x_cpc_exploration.R | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/code/travel_demand_census.R b/code/travel_demand_census.R index 22f2b9e..f1eb09c 100644 --- a/code/travel_demand_census.R +++ b/code/travel_demand_census.R @@ -191,6 +191,6 @@ census_work_msoa_tt <- census_work_msoa %>% # save #write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv") -arrow::write_parquet(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.parquet") +arrow::write_parquet(census_work_msoa_tt, ) diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R index 2adf051..9d345b8 100644 --- a/code/x_cpc_exploration.R +++ b/code/x_cpc_exploration.R @@ -49,6 +49,7 @@ cpc_matrices_all <- bind_rows(cpc_matrices) # Add column to sum all flow types cpc_matrices_all <- cpc_matrices_all %>% mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE)) + # ------- Geo boundaries # England From af81246279974621d29fe54812ff9de6e1098308 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Wed, 30 Oct 2024 08:41:19 +0000 Subject: [PATCH 04/16] begin temporal data prep --- code/travel_demand_temporal.R | 206 ++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 code/travel_demand_temporal.R diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R new file mode 100644 index 0000000..c4991f5 --- /dev/null +++ b/code/travel_demand_temporal.R @@ -0,0 +1,206 @@ +### the purpose of this script is to work with travel demand data from cpc ### +### the data is only available at MSOA level. It is cleaned and joined onto ### +### the travel time results + +library(tidyverse) +library(sf) + + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") + +# --------------- 1. READ IN THE DEMaND DATA + +# ------- Zoning layer (2011 MSOAs) + +zones <- read_csv("data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv") +# study area zones +zones_internal = zones %>% + filter(zone_type == 1) + +# ------- CPC data + +# Define the path to the ZIP file +zip_file_path <- "data/external/cpc/cpc_west_yorkshire_weekdays_march_may.zip" +# Create a temporary directory to extract the files +temp_dir <- tempdir() +# Extract the ZIP file to the temporary directory +unzip(zip_file_path, exdir = temp_dir) +# Get a list of all CSV files in the temporary directory +csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE) + +# Read each CSV file into a list of data frames +cpc_matrices <- purrr::map(csv_files, read.csv) + +# Extract the part of the filename after the underscore and before the .csv extension +file_names <- basename(csv_files) +short_names <- stringr::str_extract(file_names, "(?<=_).*(?=\\.csv)") + +# Name each data frame in the list by its original file name +names(cpc_matrices) <- short_names + +# --- Combine the dfs + +# Add a new column to each data frame with the name of the data frame +cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y)) + +# Combine into one df +cpc_matrices_all <- bind_rows(cpc_matrices) + +# replace NA values with 0 +cols <- c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb") + +cpc_matrices_all <- cpc_matrices_all %>% + mutate(across(all_of(cols), ~replace_na(., 0))) + +# Add column to sum all flow types +cpc_matrices_all <- cpc_matrices_all %>% + mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE)) + +# ------- Geo boundaries + +# England +msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp") + +# West Yorkshire (All zones with zone_type == 1) + +study_area_large <- msoa_shp_2011 %>% + filter(geo_code %in% zones_internal$msoa) %>% + rename(MSOA11CD = geo_code) + +# Leeds + +# --- administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +# ADD 2011 MSOA codes to study area +msoa_2011_2021_lookup <- read_csv("data/external/cpc/MSOA_2011_2021.csv") + +study_area <- study_area %>% + left_join(msoa_2011_2021_lookup %>% + select(c("MSOA11CD", "MSOA21CD")), + by = "MSOA21CD") + + +# ----------------------- 2. ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- # + +# ----- filter zones to study area + +# filter matrices to keep study area zones only (i.e intrazonal flows only) +cpc_matrices_all_internal <- cpc_matrices_all %>% + filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id) + + +# ----- Add desire lines + +# prepare data for od::od_to_sf() + +# ----- add msoa 2011 codes +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + # join origin data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(from_msoa = msoa, from_msoa_name = msoa_name), + by = c("from_zone" = "zone_id")) %>% + # join destination data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(to_msoa = msoa, to_msoa_name = msoa_name), + by = c("to_zone" = "zone_id")) %>% + # move msoa codes to first two columns (for od::od_to_sf()) + relocate("from_msoa", .before = everything()) %>% + relocate("to_msoa", .after = "from_msoa") + +# od::od_to_sf + +# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal, +# z = study_area_cpc_internal, +# silent = FALSE) + +# Filter to Leeds only +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + filter(from_msoa %in% study_area$MSOA11CD & to_msoa %in% study_area$MSOA11CD) + +# Rename columns +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + rename(Origin = from_msoa, Destination = to_msoa) + + +# Filter matrix by distance also adds desire lines +cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, + od_matrix = cpc_matrices_all_internal, + dist_threshold = 1000) + + + + +# -------------- 3. JOIN TRAVEL TIME DATA + +# ---------- MSOA level + +# read in the data +tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded.parquet") + +# some OD pairs don't have travel time data (for some combinations). Expand grid so that we explitly mention these OD pairs +tt_matrix_msoa_exp <- tidyr::crossing(from_id = tt_matrix_msoa$from_id, to_id = tt_matrix_msoa$to_id, combination = tt_matrix_msoa$combination) + +tt_matrix_msoa <- tt_matrix_msoa_exp %>% + left_join(tt_matrix_msoa, by = c("from_id", "to_id", "combination")) + + + +# add metadata (MSOA for each home and workplace zone) +tt_matrix_msoa <- tt_matrix_msoa %>% + left_join(study_area %>% + select(MSOA21CD, MSOA11CD, OBJECTID) %>% + st_drop_geometry() %>% + rename_with(~paste0(., "_home")) %>% + mutate(across(everything(), ~as.character(.))), + by = c("from_id" = "OBJECTID_home")) %>% + left_join(study_area %>% + select(MSOA21CD, MSOA11CD, OBJECTID) %>% + st_drop_geometry() %>% + rename_with(~paste0(., "_work")) %>% + mutate(across(everything(), ~as.character(.))), + by = c("to_id" = "OBJECTID_work")) + + +# ----- Prepare time_of_day column in tt_matrix to match demand matrix + +# keep only pt travel times +tt_matrix_msoa <- tt_matrix_msoa %>% + filter(str_detect(combination, "pt_")) + +# add combination column to cpc data +cpc_matrices_all_internal_sf <- cpc_matrices_all_internal_sf %>% + mutate(combination = case_when( + source %in% c("05-06", "06-07", "07-08", "08-09", "09-10", "10-11", "11-12") ~ "pt_wkday_morning", + source %in% c("12-13", "13-14", "14-15", "15-16", "16-17") ~ "pt_wkday_afternoon", + source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_evening") + ) + +# group by combination column +cols_to_sum = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow") + +cpc_matrices_all_internal_sf_grouped <- cpc_matrices_all_internal_sf %>% + #st_drop_geometry() %>% + group_by(Origin, Destination, distance_m, combination) %>% + summarise(across(cols_to_sum, sum, na.rm = TRUE)) + + +# join travel time data and census commute data +#TODO: join on time of day also!!! + +census_work_msoa_tt <- cpc_matrices_all_internal_sf_long %>% + left_join(tt_matrix_msoa, by = c("Origin" = "MSOA11CD_home", "Destination" = "MSOA11CD_work", "combination")) + + +# save +#write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv") +arrow::write_parquet(census_work_msoa_tt, ) + + From fa52a059ce28ac18674adb624f4c1a014cb7c1fd Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Fri, 1 Nov 2024 17:44:20 +0000 Subject: [PATCH 05/16] progress on temporal clustering --- code/demand_cluster_flows.R | 2 +- code/demand_cluster_flows_prep.R | 39 ++---- code/demand_on_buses.R | 98 ++++++++++----- code/performance_od.R | 208 ++++++++----------------------- code/travel_demand_temporal.R | 55 ++++---- code/x_cpc_exploration.R | 14 ++- 6 files changed, 160 insertions(+), 256 deletions(-) diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index 5400e80..54dbf87 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -16,8 +16,8 @@ source("R/dbscan_sensitivity.R") ########## ----------------------- Read in the data ----------------------- ########## geography <- "MSOA" +od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson")) # od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson")) -od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson")) # path tp save plots plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index fe8ac10..0ef5388 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -46,20 +46,12 @@ study_area <- study_area %>% # Demand (census) + supply (travel time) data -if(mode == FALSE){ - # data with "commute_all" only - #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) -} else{ - # data with modes - #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) -} - -# filter to specific combination -# TODO: get seperate flows for car and pt, and keep two combinations -od_demand <- od_demand %>% - filter(combination == "pt_wkday_morning") +od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) + +# # filter to specific combination +# # TODO: get seperate flows for car and pt, and keep two combinations +# od_demand <- od_demand %>% +# filter(combination == "pt_wkday_morning") od_demand <- od_demand %>% select(-distance_m) @@ -125,7 +117,7 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE) dir(tempdir()) od_demand_for_jittering <- od_demand_filtered %>% - select(Origin, Destination, starts_with("commute_")) + select(Origin, Destination, total_flow) # arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details @@ -138,7 +130,7 @@ od_demand_jittered = odjitter::jitter( origin_key = "Origin", destination_key = "Destination", # column with the flows (to be disaggregated) - disaggregation_key = "commute_all", + disaggregation_key = "total_flow", # What's the maximum number of trips per output OD row that's allowed? disaggregation_threshold = 95, @@ -167,9 +159,12 @@ od_demand_jittered = odjitter::jitter( deduplicate_pairs = TRUE ) +# # TODO: remove when jittering is fixed +# od_demand_jittered <- od_demand_filtered + # jittered returns fractions. Round them od_demand_jittered <- od_demand_jittered %>% - mutate(across(starts_with("commute_"), round)) + mutate(across(starts_with("total_flow"), round)) @@ -227,15 +222,7 @@ od_demand_jittered_scenarios <- od_demand_jittered %>% ### Save the sfs for each scenario - -if(mode == FALSE){ - # data with "commute_all" only - st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson"), delete_dsn = TRUE) - -} else{ - # data with modes - st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson"), delete_dsn = TRUE) - } +st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"), delete_dsn = TRUE) diff --git a/code/demand_on_buses.R b/code/demand_on_buses.R index 12a1466..01b8806 100644 --- a/code/demand_on_buses.R +++ b/code/demand_on_buses.R @@ -23,7 +23,7 @@ source("R/filter_od_matrix.R") ########## ----------------------- Read in the data ----------------------- ########## # is the demand data disaggregated by mode? -mode = TRUE +mode = FALSE # ----------- 1. Study area @@ -54,25 +54,18 @@ gtfs_paths = paste0(gtfs_dir, gtfs_names) gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus", gtfs_paths)]) #gtfs_rail <- gtfstools::read_gtfs(gtfs_paths[grepl("rail", gtfs_paths)]) -# --- filter the feeds to a specific point in time -gtfs_trip_ids <- gtfs_bus$frequencies %>% - filter(start_time == "07:30:00") - -gtfs_bus <- gtfs_bus %>% - gtfstools::filter_by_trip_id(gtfs_trip_ids$trip_id) +# # --- filter the feeds to a specific point in time +# gtfs_trip_ids <- gtfs_bus$frequencies %>% +# filter(start_time == "07:30:00") +# +# gtfs_bus <- gtfs_bus %>% +# gtfstools::filter_by_trip_id(gtfs_trip_ids$trip_id) # ----------- 3. Census OD data # Demand (census) + supply (travel time) data -if(mode == TRUE){ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet")) - -} else{ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet")) -} - -# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) +od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed.parquet")) # # columns to reference (they differ based on geography) # from_id_col = paste0(geography, "21CD_home") @@ -107,8 +100,8 @@ od_supply <- od_supply %>% # save the output -arrow::write_parquet(od_supply, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage.parquet")) -#od_supply <- arrow::read_parquet( paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage.parquet")) +arrow::write_parquet(od_supply, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage_temporal.parquet")) +#od_supply <- arrow::read_parquet( paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage_temporal.parquet")) # --- remove od pairs with very short distance @@ -135,13 +128,12 @@ od_supply_filtered = filter_matrix_by_distance(zones = study_area, # filter(combination == "pt_wkday_morning") od_sd <- od_demand %>% - filter(combination == "pt_wkday_morning") %>% left_join(od_supply_filtered %>% select(-distance_m) %>% st_drop_geometry(), by = c("Origin", # = from_id_col, - "Destination", # = to_id_col, - "departure_time" = "start_time")) + "Destination", # = to_id_col, + "departure_time" = "start_time")) # # save output @@ -153,7 +145,7 @@ od_sd <- od_demand %>% # Method 1: all_to_all trips_sd_1 <- od_sd %>% group_by(trip_id, departure_time, combination) %>% - summarise(potential_demand_all_to_all = sum(commute_all, na.rm = TRUE)) %>% + summarise(potential_demand_all_to_all = sum(total_flow, na.rm = TRUE)) %>% ungroup() # Method 2: frequency-based @@ -162,7 +154,7 @@ trips_sd_2 <- od_sd %>% # get number of passengers on each route for each OD pair mutate(group_id = cur_group_id(), frequency_min = 3600/headway_secs, - commute_route = round((commute_all * frequency_min) / sum(frequency_min))) %>% + commute_route = round((total_flow * frequency_min) / sum(frequency_min))) %>% ungroup() %>% # sum over the route group_by(trip_id, departure_time, combination) %>% @@ -173,7 +165,7 @@ trips_sd_2 <- od_sd %>% trips_sd_3 <- od_sd %>% group_by(departure_time, combination, Origin, Destination) %>% # get number of passengers on each route for each OD pair - mutate(commute_route = round((commute_all / n()))) %>% + mutate(commute_route = round((total_flow / n()))) %>% ungroup() %>% # sum over the route group_by(trip_id, departure_time, combination) %>% @@ -204,12 +196,8 @@ od_trips_sd <- od_trips_sd %>% dplyr::top_n(1, potential_demand_equal_split) %>% ungroup() -if(mode == TRUE){ - arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) -} else{ - arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) -} +arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) # ----------- 4. Add geometry to plot results @@ -229,7 +217,7 @@ trips_sd_sf <- trips_sd %>% # ----------- 5. Save the output -st_write(trips_sd_sf, paste0("data/processed/travel_demand/trips_potential_demand_census_", geography, ".geojson"), delete_dsn = TRUE) +st_write(trips_sd_sf, paste0("data/processed/travel_demand/trips_potential_demand_temporal_", geography, ".geojson"), delete_dsn = TRUE) @@ -245,18 +233,24 @@ trips_sd_sf_shape_sum <- trips_sd_sf %>% # ########### --------------------------- 5. Plots --------------------------- ########## + # ----------- Potential demand histograms ----------- # -plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/" +plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/temporal/" # TRIP LEVEL +trips_sd = trips_sd %>% + mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening"))) + + # equal split trips_sd %>% #mutate(potential_demand_equal_split = replace_na(potential_demand_equal_split, 0)) %>% ggplot(aes(x = potential_demand_equal_split)) + geom_histogram(bins = 50, alpha = 0.8) + + facet_grid(combination ~.) + labs(title = "Potential demand on PT routes", subtitle = "Equal split of demand from each OD\nonto routes that serve it", x = "Potential demand (no. of passengers)", @@ -276,7 +270,7 @@ trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.5, alpha = 0.8) + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand (no. of passengers using route) - Thousands", @@ -304,7 +298,7 @@ trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.1, alpha = 0.8) + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand (no. of passengers using route) - Thousands", @@ -319,6 +313,9 @@ ggsave(filename = paste0(plots_path, "plot_hist_potential_demand_routes_two_meth # ----- OD LEVEL +od_trips_sd = od_trips_sd %>% + mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening"))) + # equal split od_trips_sd %>% @@ -335,6 +332,7 @@ od_trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.5, alpha = 0.8) + + facet_grid(.~combination) + #geom_density()+ #scale_y_log10() + #geom_vline(data = percentiles_df, aes(xintercept = potential_demand, color = "blue")) + @@ -370,7 +368,7 @@ od_trips_sd %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.5, alpha = 0.8) + #scale_y_log10() + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand on busiest route serving OD pair ('000)", @@ -400,7 +398,7 @@ od_trips_sd %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.5, alpha = 0.8) + #scale_y_log10() + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand on busiest route serving OD pair ('000)", @@ -447,6 +445,25 @@ trips_sd %>% ggsave(filename = paste0(plots_path, "plot_dens_potential_demand_trips_equal_split.png")) +trips_sd %>% + #mutate(potential_demand_equal_split = replace_na(potential_demand_equal_split, 0)) %>% + ggplot(aes(x = potential_demand_equal_split)) + + geom_density()+ + #scale_y_log10() + + geom_vline(data = percentiles_trip, aes(xintercept = potential_demand, color = labels))+ + facet_grid(.~combination) + + labs(title = "Potential demand on PT routes", + subtitle = "Equal split of demand from each OD\nonto routes that serve it", + x = "Potential demand (no. of passengers)", + color = "Percentile") + + theme( + axis.text.x = element_text(angle = 45, hjust = 1), + axis.title.x = element_text(size = 8), + axis.title.y = element_text(size = 8), + legend.position = "bottom" + ) + +ggsave(filename = paste0(plots_path, "plot_dens_potential_demand_trips_equal_split_facet.png")) # --- OD level @@ -473,6 +490,7 @@ od_trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_density()+ + facet_grid(.~combination) + #scale_y_log10() + geom_vline(data = percentiles_df, aes(xintercept = potential_demand, color = labels))+ scale_color_brewer(type = "qual", palette = "Set1") + @@ -515,6 +533,16 @@ percentiles_speed <- data.frame( labels = c("10", "25", "50", "75", "90") ) +percentiles_speed_combination <- od_trips_sd %>% + group_by(combination) %>% + summarise( + percentiles = list(c(0.1, 0.25, 0.5, 0.75, 0.9)), + speed_kph = list(quantile(speed_kph, c(0.1, 0.25, 0.5, 0.75, 0.9))), + labels = list(c("10", "25", "50", "75", "90")) + ) %>% + unnest(cols = c(percentiles, speed_kph, labels)) + + od_trips_sd %>% ggplot(aes(x = speed_kph)) + @@ -536,6 +564,8 @@ od_trips_sd %>% ggsave(filename = paste0(plots_path, "plot_dens_speeds_od_equal_split.png")) + + # # # # ----- a) potential ridership on all different trips diff --git a/code/performance_od.R b/code/performance_od.R index 1ca4dbc..d6d806b 100644 --- a/code/performance_od.R +++ b/code/performance_od.R @@ -15,9 +15,10 @@ source("R/filter_od_matrix.R") geography = "MSOA" # are we processing the data that is disaggregated by mode? mode = FALSE +source = "cpc" # --- where do we want to save the plots? -plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/") +plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/", source, "/") @@ -40,12 +41,7 @@ study_area <- study_area %>% # ----- Travel time and demand matrix -if(mode == TRUE){ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) - -} else{ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) -} +od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), ".parquet")) @@ -60,7 +56,8 @@ to_id_col = paste0(geography, "21CD_work") # rename columns for function below od_demand <- od_demand %>% rename(Origin = all_of(from_id_col), - Destination = all_of(to_id_col)) + Destination = all_of(to_id_col)) %>% + relocate(Origin, Destination) @@ -98,7 +95,7 @@ od_demand_sf_rank <- od_demand_sf %>% speed_percentile_fct = cut(speed_percentile, breaks = seq(0, 1, by = 0.25), include.lowest = TRUE), - demand_percentile = percent_rank(commute_all), + demand_percentile = percent_rank(total_flow), demand_percentile_fct = cut(demand_percentile, breaks = seq(0, 1, by = 0.25), include.lowest = TRUE), @@ -108,12 +105,7 @@ od_demand_sf_rank <- od_demand_sf %>% # ---------- 5. Save the output -if(mode == TRUE){ - arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet")) - -} else{ - arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet")) -} +arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed.parquet")) @@ -134,7 +126,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -175,8 +167,8 @@ tm_shape(study_area) + alpha = 0.5) + tm_shape(od_demand_sf_rank %>% filter(combination == "pt_wkday_morning", n_rides == 1)) + - tm_lines(col = "commute_all", - lwd = "commute_all", + tm_lines(col = "total_flow", + lwd = "total_flow", legend.lwd.show = FALSE, alpha = 0.8, title.col = "Demand between OD pair", @@ -216,8 +208,8 @@ tm_shape(study_area) + alpha = 0.5) + tm_shape(od_demand_sf_rank %>% filter(combination == "pt_wkday_morning", speed_percentile <= 0.25)) + - tm_lines(col = "commute_all", - lwd = "commute_all", + tm_lines(col = "total_flow", + lwd = "total_flow", scale = 10, palette = "RdYlGn", legend.lwd.show = FALSE, @@ -257,8 +249,8 @@ tm_shape(study_area) + tm_shape(od_demand_sf_rank %>% mutate(n_rides = round(n_rides)) %>% filter(speed_percentile <= 0.25, combination == "pt_wkday_morning")) + - tm_lines(col = "commute_all", - lwd = "commute_all", + tm_lines(col = "total_flow", + lwd = "total_flow", scale = 10, #palette = "Blues", #style = "quantile", @@ -310,7 +302,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning", !is.na(quadrant)) %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -354,7 +346,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning", quadrant == "low speed high demand") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -394,8 +386,8 @@ tm_shape(study_area) + tm_shape(od_demand_sf_rank %>% filter(combination == "pt_wkday_morning", quadrant == "low speed high demand", n_rides != 0) %>% mutate(n_rides = as.factor(round(n_rides)))) + - tm_lines(col = "commute_all", - lwd = "commute_all", + tm_lines(col = "total_flow", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "Demand between OD pair", @@ -443,7 +435,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning", quadrant == "low speed high demand") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -487,7 +479,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -514,133 +506,37 @@ tm_shape(study_area) + +# ----- New plots after getting CPC temporal data +tm_shape(study_area) + + tm_fill(col = "grey95") + + tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(od_demand_sf_rank %>% + filter(!is.na(quadrant)) %>% + mutate(n_rides = as.factor(round(n_rides)))) + + tm_lines(col = "n_rides", + lwd = "total_flow", + #legend.lwd.show = FALSE, + alpha = 0.5, + title.col = "No. of rides required", + title.lwd = "Demand between OD pair", + legend.col.is.portrait = FALSE, + palette = "Dark2", #-RdYlGn + style = "quantile", + scale = 15) + + tm_facets_grid("quadrant", "combination", + type = "stack", + free.coords = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = "Demand between OD pairs\nfaceted by combination of speed and demand (percentiles)\nbetween OD pairs", + main.title.size = 1.3, + main.title.color = "azure4", + main.title.position = "left", + legend.outside = TRUE, + legend.outside.position = "bottom", + legend.stack = "horizontal", + legend.title.size = 0.9, + frame = FALSE) -# ARCHIVE FOR NOW - -# -# -# ggplot(od_demand_sf_rank %>% -# filter(ride_time > 5), -# aes(x = distance_m/ride_time, y = speed_kph, color = combination)) + -# geom_point() -# -# -# ggplot(od_demand_sf_rank %>% -# filter(ride_time > 5, combination == "pt_wkday_morning"), -# aes(x = distance_m/ride_time, y = speed_kph)) + -# geom_point() -# -# ggplot(od_demand_sf_rank %>% -# filter(ride_time > 5, combination != "car"), -# aes(x = ride_time / total_time)) + -# geom_histogram() + -# facet_wrap(~combination, ncol = 3) -# -# -# -# # ----- exploratory plots -# ggplot(od_demand_sf_rank, aes(x = speed_kph, y = speed_percentile, color = combination)) + -# geom_point() -# -# # speed -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = speed_kph, y = speed_percentile, color = combination)) + -# geom_point() -# -# # demand -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = commute_all, y = demand_percentile, color = combination)) + -# geom_point() -# -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = demand_percentile, fill = combination)) + -# geom_histogram() + -# facet_wrap(~combination, ncol = 3) -# -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = commute_all, fill = combination)) + -# geom_histogram() + -# facet_wrap(~combination, ncol = 3) -# -# -# od_demand_sf_rank_f <- od_demand_sf_rank %>% -# filter(combination == "pt_wkday_morning") -# -# -# od_demand_sf_rank %>% -# filter(combination == "pt_wkday_morning") %>% -# # as.factor to avoid "continuous value to discrete scale" error -# ggplot(aes(x = speed_percentile, y = demand_percentile, color = distance_m)) + -# geom_point() + -# scale_color_distiller(palette="Reds", direction = 1) -# -# -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# # as.factor to avoid "contiuous value to discrete scale -# ggplot(aes(x = speed_percentile, y = demand_percentile, color = distance_m)) + -# geom_point() + -# scale_color_distiller(palette="Reds", direction = 1) + -# facet_wrap(~combination, ncol = 3) -# -# -# -# -# -# # round n_rides to whole number -# # TODO: move this further upstream (previous script) -# od_demand_sf_rank <- od_demand_sf_rank %>% -# mutate(n_rides = round(n_rides)) -# -# -# # ---------- 5. Create communities based on demand (or travel time?) -# # different community for PT and PVT -# -# # ----------- 5. Plots -# -# # ----------- prep the layer -# # TODO: remove this when we have multiple start times (currently the od_supply df only has 7:30 - WHY? ) -# od_demand_sf_rank <- od_demand_sf_rank %>% -# filter(combination == "pt_wkday_morning") -# -# -# # ----------- 6. Maps -# -# -# -# -# -# -# -# -# -# sfnetworks::as_sfnetwork(od_demand_sf_rank, directed = FALSE) -> x -# -# x %>% activate(nodes) %>% -# mutate(group = tidygraph::group_louvain(weights = `commute_all`)) -> x2 -# -# # OR -# # x2 = x %>% -# # morph(to_linegraph) %>% -# # mutate(group = tidygraph::group_louvain(weights = "commute_all")) %>% -# # unmorph() -# -# -# plot(st_geometry(x, "edges"), col = "grey", lwd = 0.5) -# plot(st_geometry(study_area)) -# -# x2 %>% -# activate("nodes") %>% -# st_as_sf() %>% -# transmute(group = as.factor(group)) %>% -# plot(lwd = 4, add = TRUE) -# -# -# -# -# diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R index c4991f5..97d6857 100644 --- a/code/travel_demand_temporal.R +++ b/code/travel_demand_temporal.R @@ -9,7 +9,7 @@ library(sf) source("R/study_area_geographies.R") source("R/filter_od_matrix.R") -# --------------- 1. READ IN THE DEMaND DATA +# --------------- 1. READ IN THE DEMAND DATA # ------- Zoning layer (2011 MSOAs) @@ -86,7 +86,7 @@ study_area <- study_area %>% by = "MSOA21CD") -# ----------------------- 2. ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- # +# ----------------------- 2. ADD MSOA CODES AND FILTER TO INTERNAL FLOWS ----------------------- # # ----- filter zones to study area @@ -94,9 +94,6 @@ study_area <- study_area %>% cpc_matrices_all_internal <- cpc_matrices_all %>% filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id) - -# ----- Add desire lines - # prepare data for od::od_to_sf() # ----- add msoa 2011 codes @@ -115,30 +112,12 @@ cpc_matrices_all_internal <- cpc_matrices_all_internal %>% relocate("from_msoa", .before = everything()) %>% relocate("to_msoa", .after = "from_msoa") -# od::od_to_sf - -# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal, -# z = study_area_cpc_internal, -# silent = FALSE) # Filter to Leeds only cpc_matrices_all_internal <- cpc_matrices_all_internal %>% filter(from_msoa %in% study_area$MSOA11CD & to_msoa %in% study_area$MSOA11CD) -# Rename columns -cpc_matrices_all_internal <- cpc_matrices_all_internal %>% - rename(Origin = from_msoa, Destination = to_msoa) - - -# Filter matrix by distance also adds desire lines -cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, - od_matrix = cpc_matrices_all_internal, - dist_threshold = 1000) - - - - -# -------------- 3. JOIN TRAVEL TIME DATA +# -------------- 3. JOIN TRAVEL TIME DATA ---------- # # ---------- MSOA level @@ -176,7 +155,7 @@ tt_matrix_msoa <- tt_matrix_msoa %>% filter(str_detect(combination, "pt_")) # add combination column to cpc data -cpc_matrices_all_internal_sf <- cpc_matrices_all_internal_sf %>% +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% mutate(combination = case_when( source %in% c("05-06", "06-07", "07-08", "08-09", "09-10", "10-11", "11-12") ~ "pt_wkday_morning", source %in% c("12-13", "13-14", "14-15", "15-16", "16-17") ~ "pt_wkday_afternoon", @@ -186,21 +165,31 @@ cpc_matrices_all_internal_sf <- cpc_matrices_all_internal_sf %>% # group by combination column cols_to_sum = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow") -cpc_matrices_all_internal_sf_grouped <- cpc_matrices_all_internal_sf %>% - #st_drop_geometry() %>% - group_by(Origin, Destination, distance_m, combination) %>% - summarise(across(cols_to_sum, sum, na.rm = TRUE)) +cpc_matrices_all_internal_grouped <- cpc_matrices_all_internal %>% + group_by(from_msoa, to_msoa, combination) %>% + summarise(across(cols_to_sum, sum, na.rm = TRUE)) %>% + ungroup() # join travel time data and census commute data #TODO: join on time of day also!!! -census_work_msoa_tt <- cpc_matrices_all_internal_sf_long %>% - left_join(tt_matrix_msoa, by = c("Origin" = "MSOA11CD_home", "Destination" = "MSOA11CD_work", "combination")) +cpc_matrices_all_internal_tt <- cpc_matrices_all_internal_grouped %>% + left_join(tt_matrix_msoa, by = c("from_msoa" = "MSOA11CD_home", "to_msoa" = "MSOA11CD_work", "combination")) + + +# # ---------- 4. ADD DESIRE LINES ---------- # +# +# # # Filter matrix by distance also adds desire lines +# cpc_matrices_all_internal_tt_sf <- filter_matrix_by_distance(zones = study_area_large, +# od_matrix = cpc_matrices_all_internal_tt, +# dist_threshold = 500) + # save -#write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv") -arrow::write_parquet(census_work_msoa_tt, ) +#write_csv(cpc_matrices_all_internal_tt, "data/raw/travel_demand/cpc_matrices_2019/demand_study_area_msoa.csv") +arrow::write_parquet(cpc_matrices_all_internal_tt, "data/raw/travel_demand/cpc_matrices_2019/demand_study_area_msoa.parquet") + diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R index 9d345b8..85ea814 100644 --- a/code/x_cpc_exploration.R +++ b/code/x_cpc_exploration.R @@ -46,6 +46,12 @@ cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y)) # Combine into one df cpc_matrices_all <- bind_rows(cpc_matrices) +# replace NA values with 0 +cols <- c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb") + +cpc_matrices_all <- cpc_matrices_all %>% + mutate(across(all_of(cols), ~replace_na(., 0))) + # Add column to sum all flow types cpc_matrices_all <- cpc_matrices_all %>% mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE)) @@ -84,7 +90,7 @@ study_area <- study_area %>% # ----- filter zones to study area -# filter matrices to keep study area zones only +# filter matrices to keep study area zones only (i.e intrazonal flows only) cpc_matrices_all_internal <- cpc_matrices_all %>% filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id) @@ -119,17 +125,13 @@ cpc_matrices_all_internal <- cpc_matrices_all_internal %>% cpc_matrices_all_internal <- cpc_matrices_all_internal %>% rename(Origin = from_msoa, Destination = to_msoa) + # Filter matrix by distance also adds desire lines cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, od_matrix = cpc_matrices_all_internal, dist_threshold = 1000) - - - - - map_cpc_flows = function(od_matrix, area, column_to_map, From 845fdc4220e3bb65cb322c95eecd349c8286e657 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Mon, 4 Nov 2024 18:31:44 +0000 Subject: [PATCH 06/16] split points for odjitter --- code/demand_cluster_flows_prep.R | 127 +++++++++++++++++++++++++++---- 1 file changed, 114 insertions(+), 13 deletions(-) diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index 0ef5388..0120d10 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -1,6 +1,6 @@ # the purpose of this script is to prepare the flow data for spatial clustering # # This is done by jittering the flows so that it is not concentrated at zone centroids - +# library(tidyverse) library(sf) #library(lwgeom) @@ -18,8 +18,7 @@ source("R/filter_od_matrix.R") ########## ----------------------- Read in the data ----------------------- ########## # is the data disaggregated by mode? -mode <- TRUE -#mode <- FALSE +mode <- FALSE # ----------- 1. Study area @@ -107,7 +106,111 @@ sub_zones <- sub_zones %>% rename(population = gbr_pd_2020_1km_UNadj) - +# --- Create more points (if we don't have enough points, then odjitter may not find a feasible solution) + + +#' Split Points into Multiple Offset Points with CRS Transformation +#' +#' This function takes an `sf` object containing spatial points and splits each +#' point into multiple new points that are offset from the original points. +#' Each new point represents an equal share of a specified numeric column value, +#' and is randomly distributed within a given distance range from the original +#' point. The function transforms the CRS to a specified metric CRS for +#' processing and then converts it back to the original CRS. +#' +#' @param sf_points An `sf` object representing multiple spatial points with +#' geometry and a numeric column to be split. +#' @param col_to_split A string indicating the name of the numeric column to +#' be split across the new points. +#' @param splits An integer indicating the number of new points to generate +#' from each original point. +#' @param offset_dist_min A numeric value specifying the minimum distance +#' (in meters) for the offsets of the new points from the original points. +#' @param offset_dist_max A numeric value specifying the maximum distance +#' (in meters) for the offsets of the new points from the original points. +#' @param target_crs A numeric EPSG code or a PROJ string for the target +#' metric CRS to be used during processing. +#' +#' @return An `sf` object containing the new points, with the specified +#' numeric column value equally distributed among the generated points. +#' +#' @examples +#' # Example usage +#' library(sf) +#' sf_points <- st_as_sf(data.frame(id = 1, +#' population = 100, +#' geometry = st_sfc(st_point(c(1, 1)), +#' crs = 4326))) +#' result <- split_points(sf_points, col_to_split = "population", +#' splits = 5, offset_dist_min = 10, +#' offset_dist_max = 20, target_crs = 32630) +#' plot(st_geometry(result)) +split_points <- function(sf_points, col_to_split, splits, offset_dist_min, + offset_dist_max, target_crs) { + + # Ensure that the specified column exists in the sf object + if (!col_to_split %in% names(sf_points)) { + stop(paste("Column", col_to_split, "does not exist in the sf object.")) + } + + # Store the original CRS + original_crs <- st_crs(sf_points) + + # Transform to the target CRS + sf_points_metric <- st_transform(sf_points, target_crs) + + # Create an empty list to store new points + new_points_list <- vector("list", nrow(sf_points_metric)) + + # Loop through each point in the sf object + for (i in 1:nrow(sf_points_metric)) { + point <- sf_points_metric[i, ] + population <- point[[col_to_split]] # Get the value of the specified column + + # Calculate new population per point + new_col_to_split <- population / splits + + # Generate new points with offsets + offsets <- lapply(1:splits, function(index) { + angle <- runif(1, 0, 2 * pi) # Random angle in radians + offset_dist <- runif(1, min = offset_dist_min, max = offset_dist_max) + offset_x <- offset_dist * cos(angle) + offset_y <- offset_dist * sin(angle) + st_point(c(st_coordinates(point)[1] + offset_x, + st_coordinates(point)[2] + offset_y)) + }) + + # Create sf object for new points + new_points_sf <- st_sf( + # TODO: take column name from funciton argument instead of hardcoding + population = rep(new_col_to_split, splits), # Dynamically assign the column name + geometry = st_sfc(offsets, crs = st_crs(point)) + ) + + # Append the new points sf object to the list + new_points_list[[i]] <- new_points_sf + } + + # Combine all new points into a single sf object + result_sf <- do.call(rbind, new_points_list) + + # Transform back to the original CRS + result_sf <- st_transform(result_sf, original_crs) + + return(result_sf) +} + +# apply function +sub_zones_2 <- split_points(sub_zones, + col_to_split = "population", + splits = 5, + offset_dist_min = 10, + offset_dist_max = 20, + target_crs = 3857) + + + +####### ##### ----- STEP 2: Jittering @@ -116,8 +219,11 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE) # confirm it's empty dir(tempdir()) +selected_combination = "pt_wkday_morning" + od_demand_for_jittering <- od_demand_filtered %>% - select(Origin, Destination, total_flow) + #filter(combination == selected_combination) %>% + select(Origin, Destination, total_flow, combination) # arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details @@ -132,7 +238,7 @@ od_demand_jittered = odjitter::jitter( # column with the flows (to be disaggregated) disaggregation_key = "total_flow", # What's the maximum number of trips per output OD row that's allowed? - disaggregation_threshold = 95, + disaggregation_threshold = 50, # ----- arguments for ZONES ----- # @@ -142,7 +248,7 @@ od_demand_jittered = odjitter::jitter( # ----- Arguments for SUBPOINTS ----- # # subpoints to jitter origins and destinations to - subpoints = sub_zones, + subpoints = sub_zones_2, # # alternatively, define different points for origins and destinations # subpoints_origins = points_home, # subpoints_destinations = points_work, @@ -159,7 +265,7 @@ od_demand_jittered = odjitter::jitter( deduplicate_pairs = TRUE ) -# # TODO: remove when jittering is fixed +# TODO: remove when jittering is fixed # od_demand_jittered <- od_demand_filtered # jittered returns fractions. Round them @@ -350,8 +456,3 @@ st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geo # # # density plot: facet by demand percentile: Keep potential_demand_equal_split = NA (replace with 0) # -# -# -# -# -# From b6496a8bf5093450faf96b1509922d53f9133728 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 5 Nov 2024 12:52:04 +0000 Subject: [PATCH 07/16] ensure flow_ID is unique --- code/demand_cluster_flows.R | 20 +++++++++++++++----- code/demand_cluster_flows_prep.R | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index 54dbf87..e4492be 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -22,6 +22,9 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, " # path tp save plots plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") +# what combination are we clustering +day_time = "pt_wkday_evening" + # ----------- 1. Study area # --- administrative boundaries @@ -42,6 +45,13 @@ geoid_col = paste0(geography, "21CD") study_area <- study_area %>% relocate(all_of(geoid_col), .before = everything()) +# add distance_m column +od_demand_jittered = filter_matrix_by_distance(zones = study_area, + od_matrix = od_demand_jittered %>% st_drop_geometry(), + dist_threshold = 500) + +od_demand_jittered = od_demand_jittered %>% + filter(combination == day_time) ########## ----------------------- Different parameter combinations ----------------------- ########## # # 1) All flows + equal weight to origins and destinations (for flow distance) @@ -138,10 +148,10 @@ od_demand_xyuv <- od_demand_jittered %>% # assign unique id to each point od_demand_xyuv <- od_demand_xyuv %>% group_by(Origin, x, y) %>% - mutate(O_ID = cur_group_id()) %>% + mutate(O_ID = paste0(cur_group_id(), "_", row_number())) %>% ungroup() %>% group_by(Destination, u, v) %>% - mutate(D_ID = cur_group_id()) %>% + mutate(D_ID = paste0(cur_group_id(), "_", row_number())) %>% ungroup() # assign unique id to each OD pair @@ -261,11 +271,11 @@ w <- dist_mat %>% select(flow_ID) %>% # add commuting data to inner_join(od_demand_jittered %>% - select(flow_ID, commute_all), + select(flow_ID, total_flow), by = "flow_ID") # weight vector -w_vec <- as.vector(w$commute_all) +w_vec <- as.vector(w$total_flow) @@ -408,7 +418,7 @@ cluster_dbscan_res <- od_demand_jittered %>% # save -st_write(cluster_dbscan_res, paste0("data/processed/clustering/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, ".geojson"), delete_dsn = TRUE) +st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"), delete_dsn = TRUE) diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index 0120d10..e8f840f 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -238,7 +238,7 @@ od_demand_jittered = odjitter::jitter( # column with the flows (to be disaggregated) disaggregation_key = "total_flow", # What's the maximum number of trips per output OD row that's allowed? - disaggregation_threshold = 50, + disaggregation_threshold = 95, # ----- arguments for ZONES ----- # From b2e201b4921eb4e4d10d24417bf4098632d45134 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:10:35 +0000 Subject: [PATCH 08/16] specify demand column explicitly --- R/dbscan_sensitivity.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/dbscan_sensitivity.R b/R/dbscan_sensitivity.R index 1f813fa..0d5866d 100644 --- a/R/dbscan_sensitivity.R +++ b/R/dbscan_sensitivity.R @@ -5,13 +5,13 @@ #' @param options_minpts a vector of options for the minPts paramter #' @param weights a vector where each value represents the number of people going between an OD pair (to pass to the weights argument of dbscan) #' @param flows the original od matrix with a column that represents flows between od pairs -#' +#' @param flow_column the column that represents the number of people travelling between an OD pair #' @return a df with columns {id} (to identify the eps and minpts used), {cluster}, {size} (no of desire lines in cluster), commuters_sum (total no. of commuters in cluster) #' #' @examples #' #' @export -dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, weights, flows){ +dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, weights, flows, flow_column){ # dataframe with all combinations of eps and minpts options_parameters <- tidyr::expand_grid(eps = options_epsilon, minpts = options_minpts) @@ -53,7 +53,7 @@ dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, cluster_res_i <- cluster_dbscan_res_i %>% st_drop_geometry() %>% group_by(cluster) %>% - summarise(size = n(), commuters_sum = sum(commute_all)) %>% + summarise(size = n(), commuters_sum = sum(.data[[flow_column]])) %>% arrange(desc(size)) %>% ungroup() From 55975e3d93988003e8bbcba9a2ed17c6f460eff1 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 5 Nov 2024 19:16:13 +0000 Subject: [PATCH 09/16] maps edited for temporal data --- code/demand_cluster_flows.R | 103 +- code/demand_cluster_flows_maps.R | 23 +- code/demand_cluster_flows_maps_temporal.R | 2060 +++++++++++++++++++++ code/demand_cluster_flows_prep.R | 6 +- 4 files changed, 2126 insertions(+), 66 deletions(-) create mode 100644 code/demand_cluster_flows_maps_temporal.R diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index e4492be..c8d7cb0 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -317,56 +317,57 @@ hist(distances$fds, breaks = 100) # # # ----- Sensitivity analysis(for different epsilon and minpts combinaitons) # -# # function to get clustering results for many combinations -# dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, -# options_epsilon <- c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 7.5, 8, 9), -# options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 700, 1000, 1500, 3000, 5000, 10000), -# weights = w_vec, -# flows = st_drop_geometry(od_demand_jittered) -# ) -# -# -# arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) -# # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) -# -# -# # All results plotted together -# dbscan_sensitivity_res %>% -# filter(cluster != 0) %>% -# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + -# geom_col() + -# scale_y_continuous(trans='log10') + -# facet_wrap(~id, scales = "fixed") + -# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", -# subtitle = "Parameter combinations that returned more than 1 cluster", -# x = "Cluster no.", -# y = "No. of od pairs in cluster", -# fill= "No. of commuters") + -# theme_minimal() -# -# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_all.png", width = 14, height = 10) -# -# dbscan_sensitivity_res %>% -# filter(cluster != 0) %>% -# group_by(id) %>% -# #mutate(clusters = n()) %>% -# # How many clusters have more than 5 od pairs in them? -# mutate(clusters = sum(size > 5)) %>% -# ungroup() %>% -# filter(clusters > 5) %>% -# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + -# geom_col() + -# scale_y_continuous(trans='log10') + -# facet_wrap(~id, scales = "fixed") + -# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", -# subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", -# x = "Cluster no.", -# y = "No. of od pairs in cluster", -# fill= "No. of commuters") + -# theme_minimal() -# -# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_filtered.png", width = 14, height = 10) -# +# function to get clustering results for many combinations +dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, + options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9), + options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000), + weights = w_vec, + flows = st_drop_geometry(od_demand_jittered), + flow_column = "total_flow" + ) + + +arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet")) +# dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) + + +# All results plotted together +dbscan_sensitivity_res %>% + filter(cluster != 0) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_wrap(~id, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations that returned more than 1 cluster", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + +ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10) + +dbscan_sensitivity_res %>% + filter(cluster != 0) %>% + group_by(id) %>% + #mutate(clusters = n()) %>% + # How many clusters have more than 5 od pairs in them? + mutate(clusters = sum(size > 5)) %>% + ungroup() %>% + filter(clusters > 5) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_wrap(~id, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + +ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10) + ### -------------------- STEP 3: Cluster -------------------- ### @@ -374,7 +375,7 @@ hist(distances$fds, breaks = 100) # cluster option 1: border points assigned to cluster cluster_dbscan = dbscan::dbscan(dist_mat, - minPts = 70, # 125 + minPts = 50, # 125 eps = 8, # 9.5 #borderPoints = FALSE, weights = w_vec) diff --git a/code/demand_cluster_flows_maps.R b/code/demand_cluster_flows_maps.R index 5aafdd1..38edb05 100644 --- a/code/demand_cluster_flows_maps.R +++ b/code/demand_cluster_flows_maps.R @@ -377,7 +377,7 @@ tm_shape(study_area) + #panel.show = FALSE, panel.label.size = 1, panel.label.bg.color = NA, - # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), + # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly map_cluster_results_bus_frac_grouped_gtfs_poly @@ -742,7 +742,7 @@ tm_shape(study_area) + breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), palette = "RdYlGn", #Accent alpha = 0.4, - # title.col = "Fraction of Bus to Car users", + # title.col = "Fraction of Bus to Car users", #title.lwd = "No. of commuters", legend.col.show = FALSE, legend.lwd.show = FALSE, @@ -1143,7 +1143,7 @@ tm_shape(basemap_urban_rural) + ) + # ---- clusters # poly border -tm_shape(clusters_vis_mode_poly %>% + tm_shape(clusters_vis_mode_poly %>% filter(cluster %in% clusters_vis_mode_poly_filt$cluster)) + tm_borders(col = "black", lwd = 3, @@ -1154,9 +1154,9 @@ tm_shape(clusters_vis_mode_poly %>% nrow = rows, showNA = FALSE) + # poly fill -# tm_shape(cluster_dbscan_res_mode_poly_filt_max %>% -# filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>% -tm_shape(clusters_vis_mode_poly_filt %>% + # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>% + # filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>% + tm_shape(clusters_vis_mode_poly_filt %>% st_buffer(1000)) + tm_borders(col = "darkgreen", lwd = 2) + @@ -1221,9 +1221,9 @@ tm_shape(basemap_urban_rural) + tm_shape(st_union(clusters_vis_mode_poly_filt %>% mutate(area = st_area(.)) %>% filter(area > 0.2 * mean(area)))) + - tm_borders(col = "darkgreen", - lwd = 3.5, - lty = "dashed") + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + tm_layout(fontfamily = 'Georgia', main.title = paste0("Potential DRT Service Areas"), main.title.size = 1.1, @@ -1487,8 +1487,8 @@ ggplot(clusters_ur_poly_combined) + #direction = -1, labels = function(x) str_wrap(x, width = 25)) + scale_color_manual(values = colors_urban_rural, - #direction = -1, - labels = function(x) str_wrap(x, width = 25)) + + #direction = -1, + labels = function(x) str_wrap(x, width = 25)) + labs(x = "", y = "Area covered by cluster (km2)", fill = "Rural / Urban Classification", @@ -1507,4 +1507,3 @@ ggplot(clusters_ur_poly_combined) + facet_wrap(facets = vars(cluster)) ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R new file mode 100644 index 0000000..467db3c --- /dev/null +++ b/code/demand_cluster_flows_maps_temporal.R @@ -0,0 +1,2060 @@ +########################################################################################## +### The purpose of this script is to visualize the results from demand_cluster_flows.R ### +########################################################################################## + +library(tidyverse) +library(sf) +library(lwgeom) +library(geos) +# library(spatstat) +library(tmap) + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") + + + +# ------------------------- Define the scenario ------------------------- # + +scenario <- 3 # 3, 2 +clustering <- "equal" +distance_threshold <- 50000 # 10000 + +# save plots? +save = TRUE + +# ------------------------ Load in the data -------------------------- # + +# ----- Clustering results +day_time = "pt_wkday_evening" +cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson")) + + +# ----- Study area + +# ----------- 1. Study area + +# --- administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") + +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +study_area <- study_area %>% + st_cast("MULTIPOLYGON") + +# move the geographic ID to the first column. od::points_to_od() only keeps the first column as ID + +geoid_col = paste0(geography, "21CD") + +study_area <- study_area %>% + relocate(all_of(geoid_col), .before = everything()) + + +plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/") + + +# ------------------------- VISUALISE RESULTS ------------------------- # + +cluster_dbscan_res = cluster_dbscan_res %>% + rename(commute_all = total_flow) + + +# check size per cluster and total commuters per cluster +cluster_dbscan_res %>% + st_drop_geometry() %>% + group_by(cluster) %>% + summarise(size = n(), commuters_sum = sum(commute_all)) %>% + arrange(desc(size)) + +# add size and total commuters columns +cluster_dbscan_res <- cluster_dbscan_res %>% + group_by(cluster) %>% + mutate(size = n(), + commuters_sum = sum(commute_all)) %>% + ungroup() + +# plot +plot(cluster_dbscan_res["cluster"]) + +# ----- assign number of rows dynamically for facet plot + +# get clusters to map +cluster_dbscan_res %>% + filter(size > 20, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) -> clusters_vis + + +# add alternative cluster labels +clusters_vis <- clusters_vis %>% + rename(cluster_orig = cluster) %>% + arrange(commuters_sum) %>% + group_by(cluster_orig) %>% + mutate(cluster = cur_group_id()) %>% + ungroup() + +# we want maximum 4 maps per row +rows <- round(length(unique(clusters_vis$cluster)) / 3) + + +##### ---------- MAPS ---------- ##### + +### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ### + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + tm_shape(clusters_vis) + + tm_lines(lwd = "commute_all", + col = "cluster", + #col = "darkgreen", + scale = 10, + palette = "Accent", #YlGn + #style = "pretty", + alpha = 1, + title.col = "Cluster", + title.lwd = "No. of commuters", + legend.col.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + legend.outside = TRUE, + legend.outside.position = "bottom", + legend.stack = "horizontal", + # remove panel headers + panel.show = FALSE, + frame = FALSE) -> map_cluster_results + +map_cluster_results + +if(save == TRUE){ + tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_", day_time, "_.png"), width = 12, dpi = 1080, asp = 0) +} + + +### ---------- Plot 2: Compare mode composition of each cluster (desire line level) ---------- ### + +clusters_vis_mode <- clusters_vis + +### ---------- Plot 3: Compare mode composition of each cluster (cluster level) ---------- ### + + +# Get mode composition of entire cluster + +clusters_vis_mode_summary <- clusters_vis_mode %>% + st_drop_geometry() %>% + group_by(cluster, size) %>% + summarise(across(starts_with("commute_"), sum)) %>% + ungroup() + +# add commuting fraction +clusters_vis_mode <- clusters_vis_mode %>% + inner_join(clusters_vis_mode_summary %>% select(cluster), by = "cluster") + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + tm_shape(clusters_vis_mode) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 10, + palette = "RdYlGn", #Accent + #style = "pretty", + alpha = 1, + title.col = "Travel demand", + title.lwd = "No. of commuters", + #legend.col.is.portrait = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + # legend.outside = TRUE, + # legend.outside.position = "bottom", + # legend.stack = "horizontal", + # remove panel headers + panel.show = FALSE, + frame = FALSE) -> map_cluster_results_bus_frac_grouped + +map_cluster_results_bus_frac_grouped + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + +# --- Map with clusters ovelayed onto bus routes + + + +gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf.geojson") + +# gtfs_bus <- gtfs_bus %>% +# st_filter(st_union(study_area), .predicate = st_within) + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey75", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # clusters + tm_shape(clusters_vis_mode) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 10, + palette = "RdYlGn", #Accent + #style = "pretty", + alpha = 1, + title.col = "Travel demand", + title.lwd = "No. of commuters", + #legend.col.is.portrait = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + panel.show = FALSE, + frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs + +map_cluster_results_bus_frac_grouped_gtfs + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Map with clusters as polygons (convex_hull()) + +# turn clusters into polygons using convex hull + +clusters_vis_mode_poly <- clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + mutate(cluster = as.factor(cluster)) %>% + #head(1000) %>% + group_by(cluster) %>% + summarize(commuters_sum = first(commuters_sum), + geometry = st_convex_hull(st_union(geometry))) %>% + ungroup() + + +# plot + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # clusters + tm_shape(clusters_vis_mode_poly) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + #style = "pretty", + alpha = 0.3, + title = "Travel demand", + #legend.col.is.portrait = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + #panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), + frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly + +map_cluster_results_bus_frac_grouped_gtfs_poly + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Map with clusters as polygons (convex_hull()) + lines in background + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly +tm_shape(clusters_vis_mode_poly) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Fraction of Bus \nto Car users", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), + #panel.show = FALSE, + frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + +# ------ Map with clusters as polygons (convex_hull()) + lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS + + +# GTFS: Approach 1 - st_union() +# get high frequency buses +gtfs_bus_freq <- gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 3600) %>% + # this is necessary so that geos::geos_concave_hull gives useful results later + st_filter(st_union(study_area), .predicate = st_within) %>% + st_make_valid() + +# convert to one geom in order to intersect by +gtfs_bus_freq1 <- gtfs_bus_freq %>% + st_transform(3857) %>% + st_buffer(1000) %>% + st_union() + +clusters_vis_mode_poly_filt <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq1) + + + +# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon +clusters_vis_mode_poly_filt <- clusters_vis_mode_poly_filt %>% + st_cast("MULTIPOLYGON") %>% + st_cast("POLYGON") %>% + st_make_valid() + +# retain largest poly in each cluster +clusters_vis_mode_poly_filt_max <- clusters_vis_mode_poly_filt %>% + mutate(area = st_area(.)) %>% + group_by(cluster) %>% + filter(area == max(area)) + +# convex hull for aesthetic +clusters_vis_mode_poly_filt_max <- st_convex_hull(clusters_vis_mode_poly_filt_max) + + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt_max %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt_max$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed")-> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + +# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt_max %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt_max$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Map with clusters as polygons (convex_hull()) + lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS + + +# GTFS: APPROACH 2a: concave_hull() AFTER st_union() +gtfs_bus_freq2 <- gtfs_bus_freq %>% + st_transform(3857) %>% + #st_union() %>% + geos::geos_concave_hull(ratio = 0.5, allow_holes = FALSE) %>% + st_as_sf() %>% + st_union() %>% + st_set_crs(3857) %>% + st_buffer(250) + + +# st_difference to get non overlapping geoms +clusters_vis_mode_poly_filt2 <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq2) + + + +# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon +clusters_vis_mode_poly_filt2 <- clusters_vis_mode_poly_filt2 %>% + st_cast("MULTIPOLYGON") %>% + st_cast("POLYGON") %>% + st_make_valid() + +# union geoms per cluster +clusters_vis_mode_poly_filt2 <- clusters_vis_mode_poly_filt2 %>% + group_by(cluster, commuters_sum) %>% + summarise(geometry = st_union(geometry)) %>% + ungroup() + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(cluster %in% clusters_vis_mode_poly_filt2$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commute_all)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt2$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt2 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt2$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + + + +# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS ----- CONCAVE HULL + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt2$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt2 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt2$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + +# GTFS: APPROACH 2b: concave_hull() AFTER st_union() +gtfs_bus_freq3 <- gtfs_bus_freq %>% + st_transform(3857) %>% + st_buffer(100) %>% + st_union() %>% + geos::geos_concave_hull(ratio = 0.25, allow_holes = TRUE) %>% + st_as_sf() %>% + st_set_crs(3857) %>% + st_make_valid() + + +# st_difference to get non overlapping geoms +clusters_vis_mode_poly_filt3 <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq3) + + + +# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon +clusters_vis_mode_poly_filt3 <- clusters_vis_mode_poly_filt3 %>% + st_cast("MULTIPOLYGON") %>% + st_cast("POLYGON") %>% + st_make_valid() + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus_freq %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commuters_sum)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2 + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2 + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Same as above but with startpoint and endpoint labelled + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus_freq %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commuters_sum)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # START AND ENDPOINTS + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_startpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkgreen", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 0.8, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_endpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkred", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 0.8, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + panel.label.size = 1, + panel.label.bg.color = NA, + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") + + tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') + + tm_add_legend(type = "symbol", labels = 'OD End', col = 'darkred') -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + +# Presentation Style - 2 Rows Only + +rows_ppt = 3 + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commuters_sum)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + # START AND ENDPOINTS + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_startpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkgreen", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 1, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > 200) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_endpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkred", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 1, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + panel.label.size = 1, + panel.label.bg.color = NA, + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") + + tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') + + tm_add_legend(type = "symbol", labels = 'OD End', col = 'darkred') -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + +# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS ----- CONCAVE HULL + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus_freq %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2 + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2 + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +########## - ADD RURAL URBAN AS BASE MAP + +# --- 2011 map and urban rural classification +basemap <- st_read("data/external/oa_england_2011/OA_2011_EW_BGC_V2.shp") %>% + st_transform(st_crs(study_area)) %>% + st_make_valid() + +urban_rural_csv <- read_csv("data/external/census_2011_rural_urban.csv") + + +# add numeric columns for coloring +urban_rural_csv <- urban_rural_csv %>% + mutate(RUC11CD_NM = case_when(RUC11 == "Urban major conurbation" ~ 10, + RUC11 == "Urban minor conurbation" ~ 9, + RUC11 == "Urban city and town" ~ 8, + RUC11 == "Urban city and town in a sparse setting" ~ 7, + RUC11 == "Rural town and fringe" ~ 6, + RUC11 == "Rural town and fringe in a sparse setting" ~ 5, + RUC11 == "Rural village" ~ 4, + RUC11 == "Rural village in a sparse setting" ~ 3, + RUC11 == "Rural hamlets and isolated dwellings" ~ 2, + RUC11 == "Rural hamlets and isolated dwellings in a sparse setting" ~ 1)) %>% + arrange(RUC11CD_NM) %>% + # turn column to factor + mutate(RUC11CD_NM_FCT = as.factor(RUC11CD_NM)) +# join +basemap_urban_rural <- basemap %>% + left_join(urban_rural_csv, by = "OA11CD") + + +# --- filter to study area +basemap_urban_rural <- basemap_urban_rural %>% + st_filter(st_union(study_area) %>% + st_make_valid(), + .predicate = st_intersects) + + + + + + +tm_shape(basemap_urban_rural) + + tm_fill(col = "RUC11CD_NM", + palette = "Greys", + alpha = 0.5, + title = "Level of \nUrbanisation") + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 3, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>% + # filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>% + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_borders(col = "darkgreen", + lwd = 2) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") + + tm_add_legend(type = "line", labels = 'Potential DRT\nservice area', col = 'darkgreen', lwd = 2) -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# ---------------------- ONE BIG MAP + + +tm_shape(basemap_urban_rural) + + tm_fill(col = "RUC11CD_NM", + palette = "Greys", + alpha = 0.5, + title = "Level of \nUrbanisation") + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly fill + #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + + tm_shape(st_union(clusters_vis_mode_poly_filt3 %>% + mutate(area = st_area(.)) %>% + filter(area > 0.2 * mean(area)))) + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Service Areas"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# ----- ONE BIG MAP (gtfs routes overlined) + +gtfs_bus_overline <- gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200) + +gtfs_bus_overline = stplanr::overline(gtfs_bus_overline, attrib = "headway_inv", fun = sum) + +gtfs_bus_overline = gtfs_bus_overline %>% + mutate(headway_inv = round(headway_inv)) + + +tm_shape(basemap_urban_rural) + + tm_fill(col = "RUC11CD_NM", + palette = "Greys", + alpha = 0.5, + title = "Level of \nUrbanisation") + + # bus layer + tm_shape(gtfs_bus_overline) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 20, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.7, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly fill + #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + + tm_shape(st_union(clusters_vis_mode_poly_filt3 %>% + mutate(area = st_area(.)) %>% + filter(area > 0.2 * mean(area)))) + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Service Areas"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + +# ----- ONE BIG MAP (pop density as background) + + +# --- pop density 1km grid +oa_pop_density <- stars::read_stars("data/external/population_density_grid_uk/gbr_pd_2020_1km_UNadj.tif") +# crop to study area +oa_pop_density_crop <- st_crop(oa_pop_density, study_area) + +tm_shape(st_union(study_area)) + + tm_borders(lwd =2, + col = "grey15") + + tm_shape(oa_pop_density_crop) + + tm_raster(title = "People / Km2", + palette = "Blues", + alpha = 0.5, + style = "log10_pretty") + #log10_pretty #jenks + # bus layer + tm_shape(gtfs_bus_overline) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 20, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.6, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly fill + #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + + tm_shape(st_union(clusters_vis_mode_poly_filt3 %>% + mutate(area = st_area(.)) %>% + filter(area > 0.2 * mean(area)))) + + tm_borders(col = "darkgreen", + lwd = 4, + lty = "dashed") + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Service Areas"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) +} + + + + + + + + + + + + + + +##### ---------- FIGURES ---------- ##### + +### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering + +# is the data disaggregated by mode? +mode <- TRUE +#mode <- FALSE + +# Demand (census) + supply (travel time) data + +if(mode == FALSE){ + # data with "commute_all" only + #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) + od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) +} else{ + # data with modes + #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) + od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) +} + +# filter to specific combination +# TODO: get seperate flows for car and pt, and keep two combinations +od_demand <- od_demand %>% + filter(combination == "pt_wkday_morning") + +od_demand <- od_demand %>% + select(-distance_m) + +# --- create desire lines and remove od pairs with very short distance + +# TODO: edit this to avoid clusters of very short flows +# "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid +# clusters of very short flows. this involves splitting the data into 3 chunks +# based on length ( +od_demand_filtered = filter_matrix_by_distance(zones = study_area, + od_matrix = od_demand, + dist_threshold = 1000) + +# add unique id for each row +od_demand_filtered <- od_demand_filtered %>% + mutate(od_id = paste0(Origin, "-", Destination, "-", combination)) + + + + +# ----- add the clustering results to the unjittered demand +od_demand_figures <- cluster_dbscan_res %>% + select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>% + left_join(od_demand_filtered %>% + st_drop_geometry() %>% + distinct(od_id, .keep_all = TRUE) %>% + # travel times before being ruined by od_jitter() + select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")), + by = "od_id") + +# match existing clusters +# od_demand_figures_filt <- od_demand_figures %>% +# filter(size > 7, size < 5000) %>% +# filter(commute_all > 200) %>% +# filter(cluster != 0) + +od_demand_figures_filt <- od_demand_figures %>% + rename(cluster_orig = cluster) %>% + inner_join(clusters_vis %>% + st_drop_geometry() %>% + select(flow_ID, cluster), + by = "flow_ID") %>% + # keep only the clusters after intersection with gtfs + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) + + + + +# ----- plots + +# scatter plots with x: commute_all, y: fraction of bus commuters +ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) + + geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + + geom_point() + + ylim(0, 5) + + scale_color_distiller(palette= "RdYlGn", direction = 1) + + labs(x = "total no. of commuters", + y = "Bus / Car commuters (fraction)", + title = "Composition of clusters", + subtitle = "No. of commuters", + color = "Commuters (bus)") + + facet_wrap(facets = vars(cluster)) + + theme_bw() + + theme(legend.position = "bottom") + +ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) + +# paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0) + + +ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + + geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + + geom_point() + + ylim(0, 5) + + scale_color_distiller(palette= "RdYlGn", direction = 1) + + labs(x = "total no. of commuters", + y = "Bus / Car commuters (fraction)", + color = "Average speed of \nbus commute (kph)", + title = "Composition of clusters: \nNo. of commuters in each OD pair") + + facet_wrap(facets = vars(cluster)) + + theme_bw() + + theme(legend.position = "bottom") + +ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) + +# same but replacing NA speed for min speed +ggplot(od_demand_figures_filt %>% + st_drop_geometry() %>% + mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))), + aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + + geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + + geom_point() + + ylim(0, 5) + + scale_color_distiller(palette= "RdYlGn", direction = 1) + + labs(x = "total no. of commuters", + y = "Bus / Car commuters (fraction)", + color = "Average speed of \nbus commute (kph)", + title = "Composition of clusters: \nNo. of commuters in each OD pair") + + facet_wrap(facets = vars(cluster)) + + theme_bw() + + theme(legend.position = "bottom") + +ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6) + +# ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?) + +# get intersection +clusters_vis_mode_poly %>% + st_intersection(basemap_urban_rural %>% + st_transform(st_crs(clusters_vis_mode_poly))) %>% + # get area + mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% + # area by cluster + st_drop_geometry() %>% + group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% + summarise(area_km = sum(area_km)) %>% + # keep only the clusters after intersection with gtfs + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly + +# define custom color paletter +colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A")) +# order column based on the RUC11CD_NM_FCT column (degree of urbanisation) +clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)]) + +ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) + + geom_col(color = "grey60") + + scale_fill_manual(values = colors_urban_rural, + labels = function(x) str_wrap(x, width = 25)) + + labs(x = "", + y = "Area covered by cluster (km2)", + color = "Rural / Urban Classification", + title = "Composition of clusters: Rural / Urban") + + theme_bw() + + theme(axis.text.x = element_blank(), + axis.ticks.x=element_blank(), + legend.position = "bottom", + legend.title = element_blank()) + + guides(fill = guide_legend(nrow = 3)) + + facet_wrap(facets = vars(cluster)) + +ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) + + +# Same but for filtered polygon + +clusters_vis_mode_poly_filt3 %>% + st_intersection(basemap_urban_rural %>% + st_transform(st_crs(clusters_vis_mode_poly))) %>% + # get area + mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% + # area by cluster + st_drop_geometry() %>% + group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% + summarise(area_km = sum(area_km)) %>% + # keep only the clusters after intersection with gtfs + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt + +clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)]) + + +ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) + + geom_col(color = "grey60") + + scale_fill_manual(values = colors_urban_rural, + #direction = -1, + labels = function(x) str_wrap(x, width = 25)) + + labs(x = "", + y = "Area covered by cluster (km2)", + color = "Rural / Urban Classification", + title = "Composition of clusters: Rural / Urban") + + theme_bw() + + theme(axis.text.x = element_blank(), + axis.ticks.x=element_blank(), + legend.position = "bottom", + legend.title = element_blank()) + + guides(fill = guide_legend(nrow = 3)) + + facet_wrap(facets = vars(cluster)) + +ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) + + + + +### ---- join the data to plot together +clusters_ur_poly %>% + left_join(clusters_ur_poly_filt %>% + rename(area_km_filt = area_km), + by = c("cluster", "RUC11")) -> clusters_ur_poly_combined + + + +# --- plot both together +ggplot(clusters_ur_poly_combined) + + geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) + + geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) + + scale_fill_manual(values = colors_urban_rural, + #direction = -1, + labels = function(x) str_wrap(x, width = 25)) + + scale_color_manual(values = colors_urban_rural, + #direction = -1, + labels = function(x) str_wrap(x, width = 25)) + + labs(x = "", + y = "Area covered by cluster (km2)", + fill = "Rural / Urban Classification", + title = "Composition of clusters: Rural / Urban", + subtitle = "Before and after intersecting with bus network", + caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") + + guides(color = "none", alpha = "none") + + scale_alpha_identity() + # Maintain alpha value + theme_bw() + + theme(axis.text.x = element_blank(), + axis.ticks.x=element_blank(), + legend.position = "bottom", + legend.title = element_blank(), + plot.caption = element_text(hjust = 0)) + + guides(fill = guide_legend(nrow = 3)) + + facet_wrap(facets = vars(cluster)) + +ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) + + + + + +# ---------- PLots of line bearings in each cluster + +# --- calculate bearings + +od_demand_figures_bearings <- od_demand_figures_filt %>% + mutate(bearing = stplanr::line_bearing(.), + bearing_adjusted = case_when(bearing < 0 ~ bearing + 360, + .default = bearing), + bearing_180 = case_when(bearing < 0 ~ bearing * -1, + .default = bearing)) + + + +# add length column +od_demand_figures_bearings <- od_demand_figures_bearings %>% + mutate(distance_m = units::drop_units(sf::st_length(.))) + +# --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots + + +# Define the breaks for the buckets +#breaks_angle <- seq(-10, 370, by = 20) +breaks_angle <- seq(0, 360, by = 30) +#breaks_angle <- seq(0, 180, by = 15) +breaks_distance <- seq(0, 50, by = 10) + +od_demand_figures_bearings <- od_demand_figures_bearings %>% + mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), + # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), + bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>% + mutate(bucket_distance = fct_rev(bucket_distance)) + +# Ensure the bucket factor levels are ordered correctly +#bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE) + + +# --- Plot with no of lines only (geom_bar()) + + +# bar +ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + + geom_bar() + + facet_wrap(facets = "cluster") + + labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + + scale_x_discrete(labels = breaks_angle) + + theme(axis.text.x = element_text(angle = 45, hjust = 1), + legend.position = "bottom") + +ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) + + +# circle +ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + + geom_bar() + + coord_polar(start = -0.1) + + facet_wrap(facets = "cluster") + + labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + + #scale_x_discrete(labels = breaks_angle) + + scale_x_discrete(labels = breaks_angle) + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + + +# --- Plot with total commuters geom_col() + + +# bar +ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + + geom_col() + + #coord_polar() + + facet_wrap(facets = "cluster") + + labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + + scale_x_discrete(labels = breaks_angle) + + theme(axis.text.x = element_text(angle = 45, hjust = 1), + legend.position = "bottom") + +ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) + +# circle +ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + + geom_col() + + coord_radial(rotate.angle = TRUE) + + facet_wrap(facets = "cluster") + + labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + + #scale_x_discrete(labels = breaks_angle) + + scale_x_discrete(labels = breaks_angle) + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + + + +# TESTING FACET GRID + +# Select labels to print (one in every three) +labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)] + + +ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + + geom_col() + + #coord_polar() + + facet_grid(bucket_distance ~ cluster, + drop = TRUE) + + labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") + + # scale_x_discrete(labels = breaks_angle) + + scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() + + theme_bw() + + theme(axis.text.x = element_text(angle = 60, hjust = 1), + legend.position = "bottom") + + +ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10) + + + + + + + + + + + + + +clusters_vis_sp = clusters_vis %>% + mutate(geometry = st_startpoint(.)) + +clusters_vis_ep = clusters_vis %>% + mutate(geometry = st_endpoint(.)) + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + tm_shape(clusters_vis) + + tm_lines(lwd = "commute_all", + col = "cluster", + #col = "darkgreen", + scale = 10, + palette = "Accent", #YlGn + #style = "pretty", + alpha = 1, + title.col = "Cluster", + title.lwd = "No. of commuters", + legend.col.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_shape(clusters_vis_sp) + + tm_dots(col = "black", + #col = "darkgreen", + shape = 0, + scale = 2, + palette = "Accent", #YlGn + #style = "pretty", + alpha = 1, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_shape(clusters_vis_ep) + + tm_dots(col = "darkred", + #col = "darkgreen", + shape = 6, + scale = 2, + palette = "Accent", #YlGn + #style = "pretty", + alpha = 1, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + legend.outside = TRUE, + legend.outside.position = "bottom", + legend.stack = "horizontal", + # remove panel headers + panel.show = FALSE, + frame = FALSE) -> map_cluster_results + +map_cluster_results diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index e8f840f..c7029bc 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -204,8 +204,8 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min, sub_zones_2 <- split_points(sub_zones, col_to_split = "population", splits = 5, - offset_dist_min = 10, - offset_dist_max = 20, + offset_dist_min = 100, + offset_dist_max = 500, target_crs = 3857) @@ -219,7 +219,7 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE) # confirm it's empty dir(tempdir()) -selected_combination = "pt_wkday_morning" +selected_combination = "pt_wkday_evening" od_demand_for_jittering <- od_demand_filtered %>% #filter(combination == selected_combination) %>% From bc92569d39bc3ab03a792985a60b9928a12fe092 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Tue, 5 Nov 2024 20:39:51 +0000 Subject: [PATCH 10/16] updates --- code/demand_cluster_flows.R | 103 ++++++++++++++++--------------- code/demand_cluster_flows_prep.R | 4 +- 2 files changed, 56 insertions(+), 51 deletions(-) diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index c8d7cb0..762b013 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -24,6 +24,8 @@ plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") # what combination are we clustering day_time = "pt_wkday_evening" +# sensitivity analysis? +sensitivity = FALSE # ----------- 1. Study area @@ -318,55 +320,58 @@ hist(distances$fds, breaks = 100) # # ----- Sensitivity analysis(for different epsilon and minpts combinaitons) # # function to get clustering results for many combinations -dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, - options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9), - options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000), - weights = w_vec, - flows = st_drop_geometry(od_demand_jittered), - flow_column = "total_flow" - ) - - -arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet")) -# dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) - - -# All results plotted together -dbscan_sensitivity_res %>% - filter(cluster != 0) %>% - ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + - geom_col() + - scale_y_continuous(trans='log10') + - facet_wrap(~id, scales = "fixed") + - labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", - subtitle = "Parameter combinations that returned more than 1 cluster", - x = "Cluster no.", - y = "No. of od pairs in cluster", - fill= "No. of commuters") + - theme_bw() - -ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10) - -dbscan_sensitivity_res %>% - filter(cluster != 0) %>% - group_by(id) %>% - #mutate(clusters = n()) %>% - # How many clusters have more than 5 od pairs in them? - mutate(clusters = sum(size > 5)) %>% - ungroup() %>% - filter(clusters > 5) %>% - ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + - geom_col() + - scale_y_continuous(trans='log10') + - facet_wrap(~id, scales = "fixed") + - labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", - subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", - x = "Cluster no.", - y = "No. of od pairs in cluster", - fill= "No. of commuters") + - theme_bw() +if(sensitivity == TRUE){ + dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, + options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9), + options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000), + weights = w_vec, + flows = st_drop_geometry(od_demand_jittered), + flow_column = "total_flow" + ) + + + arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet")) + # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) + + + # All results plotted together + dbscan_sensitivity_res %>% + filter(cluster != 0) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_wrap(~id, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations that returned more than 1 cluster", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + + ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10) + + dbscan_sensitivity_res %>% + filter(cluster != 0) %>% + group_by(id) %>% + #mutate(clusters = n()) %>% + # How many clusters have more than 5 od pairs in them? + mutate(clusters = sum(size > 5)) %>% + ungroup() %>% + filter(clusters > 5) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_wrap(~id, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + + ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10) -ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10) +} @@ -376,7 +381,7 @@ ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analy # cluster option 1: border points assigned to cluster cluster_dbscan = dbscan::dbscan(dist_mat, minPts = 50, # 125 - eps = 8, # 9.5 + eps = 7, # 9.5 #borderPoints = FALSE, weights = w_vec) # # for splitted distance diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index c7029bc..d984afe 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -203,7 +203,7 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min, # apply function sub_zones_2 <- split_points(sub_zones, col_to_split = "population", - splits = 5, + splits = 3, offset_dist_min = 100, offset_dist_max = 500, target_crs = 3857) @@ -238,7 +238,7 @@ od_demand_jittered = odjitter::jitter( # column with the flows (to be disaggregated) disaggregation_key = "total_flow", # What's the maximum number of trips per output OD row that's allowed? - disaggregation_threshold = 95, + disaggregation_threshold = 30, # ----- arguments for ZONES ----- # From 9d49a18c995e5138932f182b87dd7b064e2e335a Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:59:03 +0000 Subject: [PATCH 11/16] testing odjitter --- code/demand_cluster_flows_prep.R | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index d984afe..f9bffc6 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -203,7 +203,7 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min, # apply function sub_zones_2 <- split_points(sub_zones, col_to_split = "population", - splits = 3, + splits = 5, offset_dist_min = 100, offset_dist_max = 500, target_crs = 3857) @@ -219,7 +219,7 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE) # confirm it's empty dir(tempdir()) -selected_combination = "pt_wkday_evening" +# selected_combination = "pt_wkday_evening" od_demand_for_jittering <- od_demand_filtered %>% #filter(combination == selected_combination) %>% @@ -272,8 +272,26 @@ od_demand_jittered = odjitter::jitter( od_demand_jittered <- od_demand_jittered %>% mutate(across(starts_with("total_flow"), round)) - - +# # ---------- check if new ODs are in the same zone paairs as the old ODs ---------- + +od_demand_jittered_test <- od_demand_jittered %>% + mutate(Origin_new = lwgeom::st_startpoint(geometry), + Destination_new = lwgeom::st_endpoint(geometry)) %>% + st_set_geometry("Origin_new") %>% + st_join(study_area %>% select(MSOA21CD) %>% + rename(MSOA_study_area_O = MSOA21CD)) %>% + st_set_geometry("Destination_new") %>% + st_join(study_area %>% select(MSOA21CD) %>% + rename(MSOA_study_area_D = MSOA21CD)) %>% + mutate(Origin_match = MSOA_study_area_O == Origin, + Destination_match = MSOA_study_area_D == Destination) + + +# plot to check +plot(st_geometry(study_area)) +plot(st_geometry(od_demand_jittered %>% + filter(Origin == "E02002331", Destination == "E02002335")), + add = TRUE, col = "red") ########## ----------------------- Decide on the SCENARIOS we want to analyse ----------------------- ########## From a7d5d2a9e3a4f3fb3e72a22912130646cb8e2dce Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:20:55 +0000 Subject: [PATCH 12/16] remove filter_matrix_by_distance: it was removing jittered geometries --- code/demand_cluster_flows.R | 53 +++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index 762b013..b17eeb6 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -23,7 +23,7 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, " plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") # what combination are we clustering -day_time = "pt_wkday_evening" +day_time = "pt_wkday_afternoon" # sensitivity analysis? sensitivity = FALSE @@ -48,9 +48,17 @@ study_area <- study_area %>% relocate(all_of(geoid_col), .before = everything()) # add distance_m column -od_demand_jittered = filter_matrix_by_distance(zones = study_area, - od_matrix = od_demand_jittered %>% st_drop_geometry(), - dist_threshold = 500) + +crs_orig = st_crs(od_demand_jittered) +crs_metric = 3857 + +od_demand_jittered = od_demand_jittered %>% + st_transform(crs_metric) %>% + mutate(distance_m = units::drop_units(sf::st_length(.))) %>% + st_transform(crs_orig) %>% + filter(distance_m >= 500) + + od_demand_jittered = od_demand_jittered %>% filter(combination == day_time) @@ -322,8 +330,8 @@ hist(distances$fds, breaks = 100) # function to get clustering results for many combinations if(sensitivity == TRUE){ dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, - options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9), - options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000), + options_epsilon <- c(1, 1.5, 2, 3, 4, 5, 6, 7, 8, 9), + options_minpts <- c(50, 75, 100, 150, 200, 300, 500, 1000, 2500), weights = w_vec, flows = st_drop_geometry(od_demand_jittered), flow_column = "total_flow" @@ -380,8 +388,8 @@ if(sensitivity == TRUE){ # cluster option 1: border points assigned to cluster cluster_dbscan = dbscan::dbscan(dist_mat, - minPts = 50, # 125 - eps = 7, # 9.5 + minPts = 50, # evening: 50 # 125 + eps = 7, # evening: 7 # 9.5 #borderPoints = FALSE, weights = w_vec) # # for splitted distance @@ -430,4 +438,33 @@ st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario +# --- Compare clusters at different points in the day +dbscan_sensitivity_res_wkday_morning = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_morning") +dbscan_sensitivity_res_wkday_afternoon = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_afternoon.parquet")) %>% + mutate(scenario = "pt_wkday_afternoon") +dbscan_sensitivity_res_wkday_evening = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_evening.parquet")) %>% + mutate(scenario = "pt_wkday_evening") + +dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning, + dbscan_sensitivity_res_wkday_evening) + +dbscan_sensitivity_res_compare %>% + filter(cluster != 0) %>% + group_by(id) %>% + #mutate(clusters = n()) %>% + # How many clusters have more than 5 od pairs in them? + mutate(clusters = sum(size > 25)) %>% + ungroup() %>% + filter(clusters > 25) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_grid(id ~ scenario, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() From 9d6a7088834ec8ac86c6902e47432daf7b1108b7 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:14:32 +0000 Subject: [PATCH 13/16] commit before editing for temporal granularity --- code/demand_cluster_flows.R | 4 +- code/demand_cluster_flows_maps_temporal.R | 59 +++++---- code/demand_cluster_flows_prep.R | 18 ++- code/routing_r5r_temporal.R | 145 ++++++++++++++++++++++ 4 files changed, 196 insertions(+), 30 deletions(-) create mode 100644 code/routing_r5r_temporal.R diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index b17eeb6..0b6262e 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -23,7 +23,7 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, " plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") # what combination are we clustering -day_time = "pt_wkday_afternoon" +day_time = "pt_wkday_evening" # sensitivity analysis? sensitivity = FALSE @@ -77,7 +77,7 @@ od_demand_jittered = od_demand_jittered %>% # # 3) Flows with poor PT supply and low potential demand + and equal weight to origins and destinations (for flow distance) scenario <- 3 clustering <- "equal" -distance_threshold <- round(max(od_demand_jittered$distance_m), -3) +distance_threshold <- 50000 # # # 2) Focusing on shorter distances # scenario <- 3 diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R index 467db3c..153f629 100644 --- a/code/demand_cluster_flows_maps_temporal.R +++ b/code/demand_cluster_flows_maps_temporal.R @@ -26,7 +26,7 @@ save = TRUE # ------------------------ Load in the data -------------------------- # # ----- Clustering results -day_time = "pt_wkday_evening" +day_time = "pt_wkday_afternoon" cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson")) @@ -67,7 +67,8 @@ cluster_dbscan_res %>% st_drop_geometry() %>% group_by(cluster) %>% summarise(size = n(), commuters_sum = sum(commute_all)) %>% - arrange(desc(size)) + arrange(desc(size)) %>% + head(15) # add size and total commuters columns cluster_dbscan_res <- cluster_dbscan_res %>% @@ -83,7 +84,7 @@ plot(cluster_dbscan_res["cluster"]) # get clusters to map cluster_dbscan_res %>% - filter(size > 20, size < 5000) %>% + filter(size > 15, size < 5000) %>% filter(commuters_sum > 200) %>% filter(cluster != 0) -> clusters_vis @@ -96,6 +97,20 @@ clusters_vis <- clusters_vis %>% mutate(cluster = cur_group_id()) %>% ungroup() + +# Keep only the 12 largest clusters by commuters_sum if there are more than 12 +if (n_distinct(clusters_vis$cluster) > 12) { + top_clusters <- clusters_vis %>% + group_by(cluster) %>% + summarise(total_commuters = first(commuters_sum)) %>% + arrange(desc(total_commuters)) %>% + slice_head(n = 12) %>% + pull(cluster) + + clusters_vis <- clusters_vis %>% + filter(cluster %in% top_clusters) +} + # we want maximum 4 maps per row rows <- round(length(unique(clusters_vis$cluster)) / 3) @@ -142,7 +157,7 @@ tm_shape(study_area) + map_cluster_results if(save == TRUE){ - tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_", day_time, "_.png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", "_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0) } @@ -203,7 +218,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0) } # --- Map with clusters ovelayed onto bus routes @@ -268,7 +283,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_.png"), width = 12, dpi = 1080, asp = 0) } @@ -279,8 +294,8 @@ if(save == TRUE){ # turn clusters into polygons using convex hull clusters_vis_mode_poly <- clusters_vis_mode %>% - filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + # filter(size > 7, size < 5000) %>% + # filter(commuters_sum > 200) %>% filter(cluster != 0) %>% mutate(cluster = as.factor(cluster)) %>% #head(1000) %>% @@ -346,7 +361,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly.png"), width = 12, dpi = 1080, asp = 0) } @@ -429,7 +444,7 @@ tm_shape(clusters_vis_mode_poly) + map_cluster_results_bus_frac_grouped_gtfs_poly_lines if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines.png"), width = 12, dpi = 1080, asp = 0) } @@ -563,7 +578,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff.png"), width = 12, dpi = 1080, asp = 0) } @@ -640,7 +655,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff.png"), width = 12, dpi = 1080, asp = 0) } @@ -770,7 +785,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave.png"), width = 12, dpi = 1080, asp = 0) } @@ -850,7 +865,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave.png"), width = 12, dpi = 1080, asp = 0) } @@ -973,7 +988,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2 if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2.png"), width = 12, dpi = 1080, asp = 0) } @@ -1112,7 +1127,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints.png"), width = 12, dpi = 1080, asp = 0) } @@ -1250,7 +1265,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt.png"), width = 12, dpi = 1080, asp = 0) } @@ -1328,7 +1343,7 @@ tm_shape(study_area) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2 if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2.png"), width = 12, dpi = 1080, asp = 0) } @@ -1440,7 +1455,7 @@ tm_shape(basemap_urban_rural) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0) } @@ -1490,7 +1505,7 @@ tm_shape(basemap_urban_rural) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map.png"), width = 12, dpi = 1080, asp = 0) } @@ -1547,7 +1562,7 @@ tm_shape(basemap_urban_rural) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline.png"), width = 12, dpi = 1080, asp = 0) } # ----- ONE BIG MAP (pop density as background) @@ -1599,7 +1614,7 @@ tm_shape(st_union(study_area)) + map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline_", day_time, ".png"), width = 12, dpi = 1080, asp = 0) + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline.png"), width = 12, dpi = 1080, asp = 0) } diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index f9bffc6..75491c3 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -103,7 +103,10 @@ sub_zones <- sub_zones %>% # rename population density column sub_zones <- sub_zones %>% - rename(population = gbr_pd_2020_1km_UNadj) + rename(population = gbr_pd_2020_1km_UNadj) %>% + # odjitter samples with replacement. Higher weighted subpoints will keep getting sampled + # Get cube root to increase probability of other points being sampled + mutate(population = population^(1/3)) # --- Create more points (if we don't have enough points, then odjitter may not find a feasible solution) @@ -203,9 +206,9 @@ split_points <- function(sf_points, col_to_split, splits, offset_dist_min, # apply function sub_zones_2 <- split_points(sub_zones, col_to_split = "population", - splits = 5, - offset_dist_min = 100, - offset_dist_max = 500, + splits = 7, + offset_dist_min = 200, + offset_dist_max = 700, target_crs = 3857) @@ -238,7 +241,7 @@ od_demand_jittered = odjitter::jitter( # column with the flows (to be disaggregated) disaggregation_key = "total_flow", # What's the maximum number of trips per output OD row that's allowed? - disaggregation_threshold = 30, + disaggregation_threshold = 20, # ----- arguments for ZONES ----- # @@ -261,7 +264,7 @@ od_demand_jittered = odjitter::jitter( # ----- arguments OTHER ----- # # Guarantee that jittered origin and destination points are at least this distance apart - min_distance_meters = 500, + min_distance_meters = 100, deduplicate_pairs = TRUE ) @@ -343,6 +346,9 @@ od_demand_jittered_scenarios <- od_demand_jittered %>% TRUE ~ 0) ) +# remove rows that have no flows +od_demand_jittered_scenarios = od_demand_jittered_scenarios %>% + filter(total_flow != 0) ### Save the sfs for each scenario diff --git a/code/routing_r5r_temporal.R b/code/routing_r5r_temporal.R new file mode 100644 index 0000000..5217aa3 --- /dev/null +++ b/code/routing_r5r_temporal.R @@ -0,0 +1,145 @@ +################################################################################################### +### The purpose of this script is to calculate a travel time matrix for each different mode ### +### combination. r5r is used for the calculations ### +################################################################################################### + +source("R/study_area_geographies.R") +source("R/r5r_routing_wrappers.R") +# source("code/routing_prep.R") + +library(tidyverse) +library(sf) +library(r5r) + +# increase the memory available to Java. Needs to be done at the beginning of the script +options(java.parameters = "-Xmx35G") # 3 gegabytes + +# define path where graph will be built (path with osm and pbf data) +graph_path <- paste0("data/interim/routing_graph/") +# define path where routing results will be saved +travel_time_path <- paste0("data/processed/travel_times/") + +geography <- "MSOA" + +# what number do we want to give to OD pairs that cannot be reached within our travel time threshold +na_time_replace <- 150 # 2.5 hours + +# create a directory to store the results +dir.create(paste0(travel_time_path, geography)) + +# ------------------------------------- PREPARE (BASE) OD MATRIX LAYER ------------------------------------- # + +study_area <- st_read("data/interim/study_area_boundary.geojson") + +# edit the study area to match the chosen resolution +study_area <- study_area_geographies(study_area = study_area, + geography = geography) + + +# r5r requires a POINT sf object with WGS84 CRS +study_area <- study_area %>% + st_transform(4326) %>% + st_centroid() + + + +# Function takes a base layer, gets the geometry centroid, and renames the id column that we pass to it into a standard name +prep_base_layer = function(layer, id_col){ + # rename existing ID column with "id" + id_col = sym(id_col) + layer = layer %>% + rename(id = !! id_col) + # transform crs + layer = layer %>% st_transform(4326) + # get centroid + layer = layer %>% #select(id) %>% + st_centroid() +} + +# apply the function +study_area_r5 <- prep_base_layer(layer = study_area, id_col = "OBJECTID") + + + +# ------------------------------------- DEFINE ROUTING PARAMETERS ------------------------------------- # + +scenarios <- tribble( + ~scenario, ~mode, ~departure_datetime, + # public transport at different times of day / week + # weekday + "pt_wkday_06_30", c("WALK", "TRANSIT"), "14-08-2023 06:30:00", + "pt_wkday_09_30", c("WALK", "TRANSIT"), "14-08-2023 09:30:00", + "pt_wkday_12_30", c("WALK", "TRANSIT"), "14-08-2023 12:30:00", + "pt_wkday_15_30", c("WALK", "TRANSIT"), "14-08-2023 15:30:00", + "pt_wkday_18_30", c("WALK", "TRANSIT"), "14-08-2023 18:30:00", + # weekend + "pt_wkend_06_30", c("WALK", "TRANSIT"), "13-08-2023 06:30:00", + "pt_wkend_09_30", c("WALK", "TRANSIT"), "13-08-2023 09:30:00", + "pt_wkend_12_30", c("WALK", "TRANSIT"), "13-08-2023 12:30:00", + "pt_wkend_15_30", c("WALK", "TRANSIT"), "13-08-2023 15:30:00", + "pt_wkend_18_30", c("WALK", "TRANSIT"), "13-08-2023 18:30:00", + # car (travel time is the same regardless of day / time) - unless we add congestion + #"car", c("CAR"), "14-08-2023 07:30:00", +) + + +# ------------------------------------- BUILD ROUTING GRAPH ------------------------------------- # + +# stop any running r5 instances +# r5r::stop_r5() +# java garbage collector to free up memory +#rJava::.jgc(R.gc = TRUE) + +# setup r5 +print("Setting up r5...") +r5r_core <- setup_r5(data_path = graph_path, + verbose = TRUE, + overwrite = TRUE) # turn to true once we have elevation + +print("Graph built...") +# ------------------------------------- CALCULATE TRAVEL TIME MATRIX ------------------------------------- # + + +# ---------- 2. route using r5r::expanded_travel_time_matrix. This gives you travel time broken down by journey components + +# --- apply the routing function + +# Option 1: story in memory + +# tt_expanded_results <- tt_matrix_expanded(#scenarios = scenarios, +# scenarios = scenarios[scenarios$scenario != "car", ], +# zone_layer = study_area_r5[1:10,], +# time_window = 5, +# storage_option = "memory") + +# Option 2: save to disk + +tt_matrix_expanded(scenarios = scenarios[scenarios$scenario != "car", ], + zone_layer = study_area_r5, + time_window = 10, + storage_option = "save", + save_format = "parquet", + folder_path = paste0(travel_time_path, geography, "/travel_time_matrix_expanded/temporal")) + + +# --- read in the parquet files +# read in all the files using purrr::map +files <- dir(paste0(travel_time_path, geography, "/travel_time_matrix_expanded/temporal"), full.names = TRUE) +tt_results_expanded <- map(files, arrow::read_parquet) +tt_results_expanded <- bind_rows(tt_results_expanded) + +# summarise the results to get one row per group. Currently we have one row for each minute in a time_window +tt_results_expanded_s <- summarise_ttm_expanded(ttm_expanded_results = tt_results_expanded) + +# save the results +arrow::write_parquet(tt_results_expanded_s, paste0(travel_time_path, geography, "/travel_time_matrix_expanded_temporal.parquet")) +#tt_results_expanded_s <- arrow::read_parquet(paste0(travel_time_path, geography, "/travel_time_matrix_expanded.parquet")) + + + + +# stop r5 +r5r::stop_r5(r5r_core) +# java garbage collector to free up memory +rJava::.jgc(R.gc = TRUE) + From 2096d6a05e7989e92e29e25050987a0eb3fce2f5 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Fri, 8 Nov 2024 18:13:48 +0000 Subject: [PATCH 14/16] temporal maps done --- code/demand_cluster_flows.R | 71 +- code/demand_cluster_flows_maps_temporal.R | 1030 ++++++++++----------- code/demand_cluster_flows_run.R | 251 +++++ code/demand_on_buses.R | 12 +- code/eda_travel_demand_nobus.R | 28 +- code/gtfs_convert_stop_times_to_freq.R | 12 +- code/performance_od.R | 6 +- code/travel_demand_temporal.R | 10 +- 8 files changed, 805 insertions(+), 615 deletions(-) create mode 100644 code/demand_cluster_flows_run.R diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index 0b6262e..5c4b16d 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -23,9 +23,9 @@ od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, " plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") # what combination are we clustering -day_time = "pt_wkday_evening" +# day_time = "pt_wkday_06_30" # sensitivity analysis? -sensitivity = FALSE +# sensitivity = FALSE # ----------- 1. Study area @@ -435,36 +435,43 @@ cluster_dbscan_res <- od_demand_jittered %>% st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"), delete_dsn = TRUE) +# +# +# +# # --- Compare clusters at different points in the day +# dbscan_sensitivity_res_wkday_06_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_06_30") +# dbscan_sensitivity_res_wkday_09_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_09_30") +# dbscan_sensitivity_res_wkday_12_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_12_30") +# dbscan_sensitivity_res_wkday_15_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_15_30") +# dbscan_sensitivity_res_wkday_18_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_18_30") +# +# dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning, +# dbscan_sensitivity_res_wkday_evening) +# +# +# dbscan_sensitivity_res_compare %>% +# filter(cluster != 0) %>% +# group_by(id) %>% +# #mutate(clusters = n()) %>% +# # How many clusters have more than 5 od pairs in them? +# mutate(clusters = sum(size > 25)) %>% +# ungroup() %>% +# filter(clusters > 25) %>% +# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + +# geom_col() + +# scale_y_continuous(trans='log10') + +# facet_grid(id ~ scenario, scales = "fixed") + +# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", +# subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", +# x = "Cluster no.", +# y = "No. of od pairs in cluster", +# fill= "No. of commuters") + +# theme_bw() -# --- Compare clusters at different points in the day -dbscan_sensitivity_res_wkday_morning = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% - mutate(scenario = "pt_wkday_morning") -dbscan_sensitivity_res_wkday_afternoon = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_afternoon.parquet")) %>% - mutate(scenario = "pt_wkday_afternoon") -dbscan_sensitivity_res_wkday_evening = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_evening.parquet")) %>% - mutate(scenario = "pt_wkday_evening") - -dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning, - dbscan_sensitivity_res_wkday_evening) - - -dbscan_sensitivity_res_compare %>% - filter(cluster != 0) %>% - group_by(id) %>% - #mutate(clusters = n()) %>% - # How many clusters have more than 5 od pairs in them? - mutate(clusters = sum(size > 25)) %>% - ungroup() %>% - filter(clusters > 25) %>% - ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + - geom_col() + - scale_y_continuous(trans='log10') + - facet_grid(id ~ scenario, scales = "fixed") + - labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", - subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", - x = "Cluster no.", - y = "No. of od pairs in cluster", - fill= "No. of commuters") + - theme_bw() diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R index 153f629..3ca40fa 100644 --- a/code/demand_cluster_flows_maps_temporal.R +++ b/code/demand_cluster_flows_maps_temporal.R @@ -21,12 +21,12 @@ clustering <- "equal" distance_threshold <- 50000 # 10000 # save plots? -save = TRUE +save = FALSE # ------------------------ Load in the data -------------------------- # # ----- Clustering results -day_time = "pt_wkday_afternoon" +# day_time = "pt_wkday_06_30" cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson")) @@ -116,49 +116,49 @@ rows <- round(length(unique(clusters_vis$cluster)) / 3) ##### ---------- MAPS ---------- ##### - -### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ### - -tm_shape(study_area) + - tm_borders(col = "grey60", - alpha = 0.5) + - tm_shape(study_area) + - tm_fill(col = "grey95", - alpha = 0.5) + - tm_shape(clusters_vis) + - tm_lines(lwd = "commute_all", - col = "cluster", - #col = "darkgreen", - scale = 10, - palette = "Accent", #YlGn - #style = "pretty", - alpha = 1, - title.col = "Cluster", - title.lwd = "No. of commuters", - legend.col.show = FALSE, - # remove "missing from legend - showNA = FALSE) + - tm_facets(by = "cluster", - free.coords = FALSE, - nrow = rows, - showNA = FALSE) + - tm_layout(fontfamily = 'Georgia', - main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), - main.title.size = 1.1, - main.title.color = "azure4", - main.title.position = "left", - legend.outside = TRUE, - legend.outside.position = "bottom", - legend.stack = "horizontal", - # remove panel headers - panel.show = FALSE, - frame = FALSE) -> map_cluster_results - -map_cluster_results - -if(save == TRUE){ - tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", "_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0) -} +# +# ### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ### +# +# tm_shape(study_area) + +# tm_borders(col = "grey60", +# alpha = 0.5) + +# tm_shape(study_area) + +# tm_fill(col = "grey95", +# alpha = 0.5) + +# tm_shape(clusters_vis) + +# tm_lines(lwd = "commute_all", +# col = "cluster", +# #col = "darkgreen", +# scale = 10, +# palette = "Accent", #YlGn +# #style = "pretty", +# alpha = 1, +# title.col = "Cluster", +# title.lwd = "No. of commuters", +# legend.col.show = FALSE, +# # remove "missing from legend +# showNA = FALSE) + +# tm_facets(by = "cluster", +# free.coords = FALSE, +# nrow = rows, +# showNA = FALSE) + +# tm_layout(fontfamily = 'Georgia', +# main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), +# main.title.size = 1.1, +# main.title.color = "azure4", +# main.title.position = "left", +# legend.outside = TRUE, +# legend.outside.position = "bottom", +# legend.stack = "horizontal", +# # remove panel headers +# panel.show = FALSE, +# frame = FALSE) -> map_cluster_results +# +# map_cluster_results +# +# if(save == TRUE){ +# tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0) +# } ### ---------- Plot 2: Compare mode composition of each cluster (desire line level) ---------- ### @@ -180,52 +180,52 @@ clusters_vis_mode_summary <- clusters_vis_mode %>% clusters_vis_mode <- clusters_vis_mode %>% inner_join(clusters_vis_mode_summary %>% select(cluster), by = "cluster") - -tm_shape(study_area) + - tm_borders(col = "grey60", - alpha = 0.5) + - tm_shape(study_area) + - tm_fill(col = "grey95", - alpha = 0.5) + - tm_shape(clusters_vis_mode) + - tm_lines(lwd = "commute_all", - col = "commute_all", - scale = 10, - palette = "RdYlGn", #Accent - #style = "pretty", - alpha = 1, - title.col = "Travel demand", - title.lwd = "No. of commuters", - #legend.col.is.portrait = FALSE, - # remove "missing from legend - showNA = FALSE) + - tm_facets(by = "cluster", - free.coords = FALSE, - nrow = rows, - showNA = FALSE) + - tm_layout(fontfamily = 'Georgia', - main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), - main.title.size = 1.1, - main.title.color = "azure4", - main.title.position = "left", - # legend.outside = TRUE, - # legend.outside.position = "bottom", - # legend.stack = "horizontal", - # remove panel headers - panel.show = FALSE, - frame = FALSE) -> map_cluster_results_bus_frac_grouped - -map_cluster_results_bus_frac_grouped - -if(save == TRUE){ - tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0) -} +# +# tm_shape(study_area) + +# tm_borders(col = "grey60", +# alpha = 0.5) + +# tm_shape(study_area) + +# tm_fill(col = "grey95", +# alpha = 0.5) + +# tm_shape(clusters_vis_mode) + +# tm_lines(lwd = "commute_all", +# col = "commute_all", +# scale = 10, +# palette = "RdYlGn", #Accent +# #style = "pretty", +# alpha = 1, +# title.col = "Travel demand", +# title.lwd = "No. of commuters", +# #legend.col.is.portrait = FALSE, +# # remove "missing from legend +# showNA = FALSE) + +# tm_facets(by = "cluster", +# free.coords = FALSE, +# nrow = rows, +# showNA = FALSE) + +# tm_layout(fontfamily = 'Georgia', +# main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), +# main.title.size = 1.1, +# main.title.color = "azure4", +# main.title.position = "left", +# # legend.outside = TRUE, +# # legend.outside.position = "bottom", +# # legend.stack = "horizontal", +# # remove panel headers +# panel.show = FALSE, +# frame = FALSE) -> map_cluster_results_bus_frac_grouped +# +# map_cluster_results_bus_frac_grouped +# +# if(save == TRUE){ +# tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0) +# } # --- Map with clusters ovelayed onto bus routes -gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf.geojson") +gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson") # gtfs_bus <- gtfs_bus %>% # st_filter(st_union(study_area), .predicate = st_within) @@ -1058,7 +1058,7 @@ tm_shape(study_area) + alpha = 0.6, jitter = 0.01, size = "commute_all", - scale = 0.8, + scale = 0.5, legend.size.show = FALSE) + tm_facets(by = "cluster", #by = "commute_all", @@ -1077,7 +1077,7 @@ tm_shape(study_area) + alpha = 0.6, jitter = 0.01, size = "commute_all", - scale = 0.8, + scale = 0.5, legend.size.show = FALSE) + tm_facets(by = "cluster", #by = "commute_all", @@ -1196,7 +1196,7 @@ tm_shape(study_area) + alpha = 0.6, jitter = 0.01, size = "commute_all", - scale = 1, + scale = 0.5, legend.size.show = FALSE) + tm_facets(by = "cluster", #by = "commute_all", @@ -1215,7 +1215,7 @@ tm_shape(study_area) + alpha = 0.6, jitter = 0.01, size = "commute_all", - scale = 1, + scale = 0.5, legend.size.show = FALSE) + tm_facets(by = "cluster", #by = "commute_all", @@ -1463,6 +1463,20 @@ if(save == TRUE){ # ---------------------- ONE BIG MAP +# Combine cluster polygons +clusters_vis_mode_poly_filt3_all = clusters_vis_mode_poly_filt3 %>% + mutate(area = st_area(.)) %>% + filter(area > 0.2 * mean(area)) %>% + st_union() %>% + st_make_valid() %>% + st_as_sf() %>% + mutate(scenario = day_time) + +# Save to make temporal comparison plot +st_write(clusters_vis_mode_poly_filt3_all, paste0(plots_path, "polygons_combined/", day_time, ".geojson"), delete_dsn = TRUE) + + + tm_shape(basemap_urban_rural) + tm_fill(col = "RUC11CD_NM", @@ -1470,7 +1484,7 @@ tm_shape(basemap_urban_rural) + alpha = 0.5, title = "Level of \nUrbanisation") + # bus layer - tm_shape(gtfs_bus %>% +tm_shape(gtfs_bus %>% filter(scenario == day_time) %>% mutate(headway_inv = (1/headway_secs) * 3600) %>% filter(headway_secs < 7200)) + @@ -1487,9 +1501,7 @@ tm_shape(basemap_urban_rural) + # ---- clusters # poly fill #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + - tm_shape(st_union(clusters_vis_mode_poly_filt3 %>% - mutate(area = st_area(.)) %>% - filter(area > 0.2 * mean(area)))) + +tm_shape(clusters_vis_mode_poly_filt3_all) + tm_borders(col = "darkgreen", lwd = 3.5, lty = "dashed") + @@ -1544,9 +1556,7 @@ tm_shape(basemap_urban_rural) + # ---- clusters # poly fill #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + - tm_shape(st_union(clusters_vis_mode_poly_filt3 %>% - mutate(area = st_area(.)) %>% - filter(area > 0.2 * mean(area)))) + + tm_shape(clusters_vis_mode_poly_filt3_all) + tm_borders(col = "darkgreen", lwd = 3.5, lty = "dashed") + @@ -1596,9 +1606,7 @@ tm_shape(st_union(study_area)) + # ---- clusters # poly fill #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + - tm_shape(st_union(clusters_vis_mode_poly_filt3 %>% - mutate(area = st_area(.)) %>% - filter(area > 0.2 * mean(area)))) + + tm_shape(clusters_vis_mode_poly_filt3_all) + tm_borders(col = "darkgreen", lwd = 4, lty = "dashed") + @@ -1619,457 +1627,373 @@ if(save == TRUE){ - - - - - - - - - - - -##### ---------- FIGURES ---------- ##### - -### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering - -# is the data disaggregated by mode? -mode <- TRUE -#mode <- FALSE - -# Demand (census) + supply (travel time) data - -if(mode == FALSE){ - # data with "commute_all" only - #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) -} else{ - # data with modes - #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) -} - -# filter to specific combination -# TODO: get seperate flows for car and pt, and keep two combinations -od_demand <- od_demand %>% - filter(combination == "pt_wkday_morning") - -od_demand <- od_demand %>% - select(-distance_m) - -# --- create desire lines and remove od pairs with very short distance - -# TODO: edit this to avoid clusters of very short flows -# "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid -# clusters of very short flows. this involves splitting the data into 3 chunks -# based on length ( -od_demand_filtered = filter_matrix_by_distance(zones = study_area, - od_matrix = od_demand, - dist_threshold = 1000) - -# add unique id for each row -od_demand_filtered <- od_demand_filtered %>% - mutate(od_id = paste0(Origin, "-", Destination, "-", combination)) - - - - -# ----- add the clustering results to the unjittered demand -od_demand_figures <- cluster_dbscan_res %>% - select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>% - left_join(od_demand_filtered %>% - st_drop_geometry() %>% - distinct(od_id, .keep_all = TRUE) %>% - # travel times before being ruined by od_jitter() - select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")), - by = "od_id") - -# match existing clusters +# ##### ---------- FIGURES ---------- ##### +# +# ### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering +# +# # is the data disaggregated by mode? +# mode <- TRUE +# #mode <- FALSE +# +# # Demand (census) + supply (travel time) data +# +# if(mode == FALSE){ +# # data with "commute_all" only +# #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) +# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) +# } else{ +# # data with modes +# #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) +# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) +# } +# +# # filter to specific combination +# # TODO: get seperate flows for car and pt, and keep two combinations +# od_demand <- od_demand %>% +# filter(combination == "pt_wkday_morning") +# +# od_demand <- od_demand %>% +# select(-distance_m) +# +# # --- create desire lines and remove od pairs with very short distance +# +# # TODO: edit this to avoid clusters of very short flows +# # "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid +# # clusters of very short flows. this involves splitting the data into 3 chunks +# # based on length ( +# od_demand_filtered = filter_matrix_by_distance(zones = study_area, +# od_matrix = od_demand, +# dist_threshold = 1000) +# +# # add unique id for each row +# od_demand_filtered <- od_demand_filtered %>% +# mutate(od_id = paste0(Origin, "-", Destination, "-", combination)) +# +# +# +# +# # ----- add the clustering results to the unjittered demand +# od_demand_figures <- cluster_dbscan_res %>% +# select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>% +# left_join(od_demand_filtered %>% +# st_drop_geometry() %>% +# distinct(od_id, .keep_all = TRUE) %>% +# # travel times before being ruined by od_jitter() +# select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")), +# by = "od_id") +# +# # match existing clusters +# # od_demand_figures_filt <- od_demand_figures %>% +# # filter(size > 7, size < 5000) %>% +# # filter(commute_all > 200) %>% +# # filter(cluster != 0) +# # od_demand_figures_filt <- od_demand_figures %>% -# filter(size > 7, size < 5000) %>% -# filter(commute_all > 200) %>% -# filter(cluster != 0) - -od_demand_figures_filt <- od_demand_figures %>% - rename(cluster_orig = cluster) %>% - inner_join(clusters_vis %>% - st_drop_geometry() %>% - select(flow_ID, cluster), - by = "flow_ID") %>% - # keep only the clusters after intersection with gtfs - filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) - - - - -# ----- plots - -# scatter plots with x: commute_all, y: fraction of bus commuters -ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) + - geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + - geom_point() + - ylim(0, 5) + - scale_color_distiller(palette= "RdYlGn", direction = 1) + - labs(x = "total no. of commuters", - y = "Bus / Car commuters (fraction)", - title = "Composition of clusters", - subtitle = "No. of commuters", - color = "Commuters (bus)") + - facet_wrap(facets = vars(cluster)) + - theme_bw() + - theme(legend.position = "bottom") - -ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - -# paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0) - - -ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + - geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + - geom_point() + - ylim(0, 5) + - scale_color_distiller(palette= "RdYlGn", direction = 1) + - labs(x = "total no. of commuters", - y = "Bus / Car commuters (fraction)", - color = "Average speed of \nbus commute (kph)", - title = "Composition of clusters: \nNo. of commuters in each OD pair") + - facet_wrap(facets = vars(cluster)) + - theme_bw() + - theme(legend.position = "bottom") - -ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - -# same but replacing NA speed for min speed -ggplot(od_demand_figures_filt %>% - st_drop_geometry() %>% - mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))), - aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + - geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + - geom_point() + - ylim(0, 5) + - scale_color_distiller(palette= "RdYlGn", direction = 1) + - labs(x = "total no. of commuters", - y = "Bus / Car commuters (fraction)", - color = "Average speed of \nbus commute (kph)", - title = "Composition of clusters: \nNo. of commuters in each OD pair") + - facet_wrap(facets = vars(cluster)) + - theme_bw() + - theme(legend.position = "bottom") - -ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6) - -# ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?) - -# get intersection -clusters_vis_mode_poly %>% - st_intersection(basemap_urban_rural %>% - st_transform(st_crs(clusters_vis_mode_poly))) %>% - # get area - mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% - # area by cluster - st_drop_geometry() %>% - group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% - summarise(area_km = sum(area_km)) %>% - # keep only the clusters after intersection with gtfs - filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly - -# define custom color paletter -colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A")) -# order column based on the RUC11CD_NM_FCT column (degree of urbanisation) -clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)]) - -ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) + - geom_col(color = "grey60") + - scale_fill_manual(values = colors_urban_rural, - labels = function(x) str_wrap(x, width = 25)) + - labs(x = "", - y = "Area covered by cluster (km2)", - color = "Rural / Urban Classification", - title = "Composition of clusters: Rural / Urban") + - theme_bw() + - theme(axis.text.x = element_blank(), - axis.ticks.x=element_blank(), - legend.position = "bottom", - legend.title = element_blank()) + - guides(fill = guide_legend(nrow = 3)) + - facet_wrap(facets = vars(cluster)) - -ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - - -# Same but for filtered polygon - -clusters_vis_mode_poly_filt3 %>% - st_intersection(basemap_urban_rural %>% - st_transform(st_crs(clusters_vis_mode_poly))) %>% - # get area - mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% - # area by cluster - st_drop_geometry() %>% - group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% - summarise(area_km = sum(area_km)) %>% - # keep only the clusters after intersection with gtfs - filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt - -clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)]) - - -ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) + - geom_col(color = "grey60") + - scale_fill_manual(values = colors_urban_rural, - #direction = -1, - labels = function(x) str_wrap(x, width = 25)) + - labs(x = "", - y = "Area covered by cluster (km2)", - color = "Rural / Urban Classification", - title = "Composition of clusters: Rural / Urban") + - theme_bw() + - theme(axis.text.x = element_blank(), - axis.ticks.x=element_blank(), - legend.position = "bottom", - legend.title = element_blank()) + - guides(fill = guide_legend(nrow = 3)) + - facet_wrap(facets = vars(cluster)) - -ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - - - - -### ---- join the data to plot together -clusters_ur_poly %>% - left_join(clusters_ur_poly_filt %>% - rename(area_km_filt = area_km), - by = c("cluster", "RUC11")) -> clusters_ur_poly_combined - - - -# --- plot both together -ggplot(clusters_ur_poly_combined) + - geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) + - geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) + - scale_fill_manual(values = colors_urban_rural, - #direction = -1, - labels = function(x) str_wrap(x, width = 25)) + - scale_color_manual(values = colors_urban_rural, - #direction = -1, - labels = function(x) str_wrap(x, width = 25)) + - labs(x = "", - y = "Area covered by cluster (km2)", - fill = "Rural / Urban Classification", - title = "Composition of clusters: Rural / Urban", - subtitle = "Before and after intersecting with bus network", - caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") + - guides(color = "none", alpha = "none") + - scale_alpha_identity() + # Maintain alpha value - theme_bw() + - theme(axis.text.x = element_blank(), - axis.ticks.x=element_blank(), - legend.position = "bottom", - legend.title = element_blank(), - plot.caption = element_text(hjust = 0)) + - guides(fill = guide_legend(nrow = 3)) + - facet_wrap(facets = vars(cluster)) - -ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - - - - - -# ---------- PLots of line bearings in each cluster - -# --- calculate bearings - -od_demand_figures_bearings <- od_demand_figures_filt %>% - mutate(bearing = stplanr::line_bearing(.), - bearing_adjusted = case_when(bearing < 0 ~ bearing + 360, - .default = bearing), - bearing_180 = case_when(bearing < 0 ~ bearing * -1, - .default = bearing)) - - - -# add length column -od_demand_figures_bearings <- od_demand_figures_bearings %>% - mutate(distance_m = units::drop_units(sf::st_length(.))) - -# --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots - - -# Define the breaks for the buckets -#breaks_angle <- seq(-10, 370, by = 20) -breaks_angle <- seq(0, 360, by = 30) -#breaks_angle <- seq(0, 180, by = 15) -breaks_distance <- seq(0, 50, by = 10) - -od_demand_figures_bearings <- od_demand_figures_bearings %>% - mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), - # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), - bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>% - mutate(bucket_distance = fct_rev(bucket_distance)) - -# Ensure the bucket factor levels are ordered correctly -#bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE) - - -# --- Plot with no of lines only (geom_bar()) - - -# bar -ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + - geom_bar() + - facet_wrap(facets = "cluster") + - labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + - scale_x_discrete(labels = breaks_angle) + - theme(axis.text.x = element_text(angle = 45, hjust = 1), - legend.position = "bottom") - -ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) - - -# circle -ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + - geom_bar() + - coord_polar(start = -0.1) + - facet_wrap(facets = "cluster") + - labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + - #scale_x_discrete(labels = breaks_angle) + - scale_x_discrete(labels = breaks_angle) + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) - - - -# --- Plot with total commuters geom_col() - - -# bar -ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + - geom_col() + - #coord_polar() + - facet_wrap(facets = "cluster") + - labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + - scale_x_discrete(labels = breaks_angle) + - theme(axis.text.x = element_text(angle = 45, hjust = 1), - legend.position = "bottom") - -ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) - -# circle -ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + - geom_col() + - coord_radial(rotate.angle = TRUE) + - facet_wrap(facets = "cluster") + - labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + - #scale_x_discrete(labels = breaks_angle) + - scale_x_discrete(labels = breaks_angle) + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) - - - - -# TESTING FACET GRID - -# Select labels to print (one in every three) -labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)] - - -ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + - geom_col() + - #coord_polar() + - facet_grid(bucket_distance ~ cluster, - drop = TRUE) + - labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") + - # scale_x_discrete(labels = breaks_angle) + - scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() + - theme_bw() + - theme(axis.text.x = element_text(angle = 60, hjust = 1), - legend.position = "bottom") - - -ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10) - - - - - - - - - - - - - -clusters_vis_sp = clusters_vis %>% - mutate(geometry = st_startpoint(.)) - -clusters_vis_ep = clusters_vis %>% - mutate(geometry = st_endpoint(.)) - -tm_shape(study_area) + - tm_borders(col = "grey60", - alpha = 0.5) + - tm_shape(study_area) + - tm_fill(col = "grey95", - alpha = 0.5) + - tm_shape(clusters_vis) + - tm_lines(lwd = "commute_all", - col = "cluster", - #col = "darkgreen", - scale = 10, - palette = "Accent", #YlGn - #style = "pretty", - alpha = 1, - title.col = "Cluster", - title.lwd = "No. of commuters", - legend.col.show = FALSE, - # remove "missing from legend - showNA = FALSE) + - tm_facets(by = "cluster", - free.coords = FALSE, - nrow = rows, - showNA = FALSE) + - tm_shape(clusters_vis_sp) + - tm_dots(col = "black", - #col = "darkgreen", - shape = 0, - scale = 2, - palette = "Accent", #YlGn - #style = "pretty", - alpha = 1, - # remove "missing from legend - showNA = FALSE) + - tm_facets(by = "cluster", - free.coords = FALSE, - nrow = rows, - showNA = FALSE) + - tm_shape(clusters_vis_ep) + - tm_dots(col = "darkred", - #col = "darkgreen", - shape = 6, - scale = 2, - palette = "Accent", #YlGn - #style = "pretty", - alpha = 1, - # remove "missing from legend - showNA = FALSE) + - tm_facets(by = "cluster", - free.coords = FALSE, - nrow = rows, - showNA = FALSE) + - - tm_layout(fontfamily = 'Georgia', - main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), - main.title.size = 1.1, - main.title.color = "azure4", - main.title.position = "left", - legend.outside = TRUE, - legend.outside.position = "bottom", - legend.stack = "horizontal", - # remove panel headers - panel.show = FALSE, - frame = FALSE) -> map_cluster_results - -map_cluster_results +# rename(cluster_orig = cluster) %>% +# inner_join(clusters_vis %>% +# st_drop_geometry() %>% +# select(flow_ID, cluster), +# by = "flow_ID") %>% +# # keep only the clusters after intersection with gtfs +# filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) +# + + +# +# # ----- plots +# +# # scatter plots with x: commute_all, y: fraction of bus commuters +# ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) + +# geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + +# geom_point() + +# ylim(0, 5) + +# scale_color_distiller(palette= "RdYlGn", direction = 1) + +# labs(x = "total no. of commuters", +# y = "Bus / Car commuters (fraction)", +# title = "Composition of clusters", +# subtitle = "No. of commuters", +# color = "Commuters (bus)") + +# facet_wrap(facets = vars(cluster)) + +# theme_bw() + +# theme(legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# # paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0) +# +# +# ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + +# geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + +# geom_point() + +# ylim(0, 5) + +# scale_color_distiller(palette= "RdYlGn", direction = 1) + +# labs(x = "total no. of commuters", +# y = "Bus / Car commuters (fraction)", +# color = "Average speed of \nbus commute (kph)", +# title = "Composition of clusters: \nNo. of commuters in each OD pair") + +# facet_wrap(facets = vars(cluster)) + +# theme_bw() + +# theme(legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# # same but replacing NA speed for min speed +# ggplot(od_demand_figures_filt %>% +# st_drop_geometry() %>% +# mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))), +# aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + +# geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + +# geom_point() + +# ylim(0, 5) + +# scale_color_distiller(palette= "RdYlGn", direction = 1) + +# labs(x = "total no. of commuters", +# y = "Bus / Car commuters (fraction)", +# color = "Average speed of \nbus commute (kph)", +# title = "Composition of clusters: \nNo. of commuters in each OD pair") + +# facet_wrap(facets = vars(cluster)) + +# theme_bw() + +# theme(legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6) +# +# # ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?) +# +# # get intersection +# clusters_vis_mode_poly %>% +# st_intersection(basemap_urban_rural %>% +# st_transform(st_crs(clusters_vis_mode_poly))) %>% +# # get area +# mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% +# # area by cluster +# st_drop_geometry() %>% +# group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% +# summarise(area_km = sum(area_km)) %>% +# # keep only the clusters after intersection with gtfs +# filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly +# +# # define custom color paletter +# colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A")) +# # order column based on the RUC11CD_NM_FCT column (degree of urbanisation) +# clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)]) +# +# ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) + +# geom_col(color = "grey60") + +# scale_fill_manual(values = colors_urban_rural, +# labels = function(x) str_wrap(x, width = 25)) + +# labs(x = "", +# y = "Area covered by cluster (km2)", +# color = "Rural / Urban Classification", +# title = "Composition of clusters: Rural / Urban") + +# theme_bw() + +# theme(axis.text.x = element_blank(), +# axis.ticks.x=element_blank(), +# legend.position = "bottom", +# legend.title = element_blank()) + +# guides(fill = guide_legend(nrow = 3)) + +# facet_wrap(facets = vars(cluster)) +# +# ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# +# # Same but for filtered polygon +# +# clusters_vis_mode_poly_filt3 %>% +# st_intersection(basemap_urban_rural %>% +# st_transform(st_crs(clusters_vis_mode_poly))) %>% +# # get area +# mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% +# # area by cluster +# st_drop_geometry() %>% +# group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% +# summarise(area_km = sum(area_km)) %>% +# # keep only the clusters after intersection with gtfs +# filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt +# +# clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)]) +# +# +# ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) + +# geom_col(color = "grey60") + +# scale_fill_manual(values = colors_urban_rural, +# #direction = -1, +# labels = function(x) str_wrap(x, width = 25)) + +# labs(x = "", +# y = "Area covered by cluster (km2)", +# color = "Rural / Urban Classification", +# title = "Composition of clusters: Rural / Urban") + +# theme_bw() + +# theme(axis.text.x = element_blank(), +# axis.ticks.x=element_blank(), +# legend.position = "bottom", +# legend.title = element_blank()) + +# guides(fill = guide_legend(nrow = 3)) + +# facet_wrap(facets = vars(cluster)) +# +# ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# +# +# +# ### ---- join the data to plot together +# clusters_ur_poly %>% +# left_join(clusters_ur_poly_filt %>% +# rename(area_km_filt = area_km), +# by = c("cluster", "RUC11")) -> clusters_ur_poly_combined +# +# +# +# # --- plot both together +# ggplot(clusters_ur_poly_combined) + +# geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) + +# geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) + +# scale_fill_manual(values = colors_urban_rural, +# #direction = -1, +# labels = function(x) str_wrap(x, width = 25)) + +# scale_color_manual(values = colors_urban_rural, +# #direction = -1, +# labels = function(x) str_wrap(x, width = 25)) + +# labs(x = "", +# y = "Area covered by cluster (km2)", +# fill = "Rural / Urban Classification", +# title = "Composition of clusters: Rural / Urban", +# subtitle = "Before and after intersecting with bus network", +# caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") + +# guides(color = "none", alpha = "none") + +# scale_alpha_identity() + # Maintain alpha value +# theme_bw() + +# theme(axis.text.x = element_blank(), +# axis.ticks.x=element_blank(), +# legend.position = "bottom", +# legend.title = element_blank(), +# plot.caption = element_text(hjust = 0)) + +# guides(fill = guide_legend(nrow = 3)) + +# facet_wrap(facets = vars(cluster)) +# +# ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# +# +# +# +# # ---------- PLots of line bearings in each cluster +# +# # --- calculate bearings +# +# od_demand_figures_bearings <- od_demand_figures_filt %>% +# mutate(bearing = stplanr::line_bearing(.), +# bearing_adjusted = case_when(bearing < 0 ~ bearing + 360, +# .default = bearing), +# bearing_180 = case_when(bearing < 0 ~ bearing * -1, +# .default = bearing)) +# +# +# +# # add length column +# od_demand_figures_bearings <- od_demand_figures_bearings %>% +# mutate(distance_m = units::drop_units(sf::st_length(.))) +# +# # --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots +# +# +# # Define the breaks for the buckets +# #breaks_angle <- seq(-10, 370, by = 20) +# breaks_angle <- seq(0, 360, by = 30) +# #breaks_angle <- seq(0, 180, by = 15) +# breaks_distance <- seq(0, 50, by = 10) +# +# od_demand_figures_bearings <- od_demand_figures_bearings %>% +# mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), +# # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), +# bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>% +# mutate(bucket_distance = fct_rev(bucket_distance)) +# +# # Ensure the bucket factor levels are ordered correctly +# #bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE) +# +# +# # --- Plot with no of lines only (geom_bar()) +# +# +# # bar +# ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + +# geom_bar() + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1), +# legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) +# +# +# # circle +# ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + +# geom_bar() + +# coord_polar(start = -0.1) + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + +# #scale_x_discrete(labels = breaks_angle) + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +# +# +# +# # --- Plot with total commuters geom_col() +# +# +# # bar +# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + +# geom_col() + +# #coord_polar() + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1), +# legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) +# +# # circle +# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + +# geom_col() + +# coord_radial(rotate.angle = TRUE) + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + +# #scale_x_discrete(labels = breaks_angle) + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +# +# +# +# +# # TESTING FACET GRID +# +# # Select labels to print (one in every three) +# labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)] +# +# +# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + +# geom_col() + +# #coord_polar() + +# facet_grid(bucket_distance ~ cluster, +# drop = TRUE) + +# labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") + +# # scale_x_discrete(labels = breaks_angle) + +# scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() + +# theme_bw() + +# theme(axis.text.x = element_text(angle = 60, hjust = 1), +# legend.position = "bottom") +# +# +# ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10) +# +# +# +# +# +# +# +# +# +# +# diff --git a/code/demand_cluster_flows_run.R b/code/demand_cluster_flows_run.R new file mode 100644 index 0000000..6e97183 --- /dev/null +++ b/code/demand_cluster_flows_run.R @@ -0,0 +1,251 @@ +sensitivity = TRUE +day_time = "pt_wkday_06_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_09_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_12_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_15_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_18_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +geography = "MSOA" + +# --- Compare clusters at different points in the day +dbscan_sensitivity_res_wkday_06_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_06_30") +dbscan_sensitivity_res_wkday_09_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_09_30") +dbscan_sensitivity_res_wkday_12_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_12_30") +dbscan_sensitivity_res_wkday_15_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_15_30") +dbscan_sensitivity_res_wkday_18_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_18_30") + + +dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_06_30, + dbscan_sensitivity_res_wkday_09_30, + dbscan_sensitivity_res_wkday_12_30, + dbscan_sensitivity_res_wkday_15_30, + dbscan_sensitivity_res_wkday_18_30 + ) + + +dbscan_sensitivity_res_compare %>% + filter(cluster != 0) %>% + group_by(id) %>% + #mutate(clusters = n()) %>% + # How many clusters have more than 5 od pairs in them? + mutate(clusters = sum(size > 30)) %>% + ungroup() %>% + filter(clusters > 25) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_grid(id ~ scenario, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations with > 25 clusters having at least 30 od pairs each", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + + +ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_compare.png"), width = 14, height = 10) + + + + +# -------------- Plots +day_time = "pt_wkday_06_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + + +day_time = "pt_wkday_09_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + + +day_time = "pt_wkday_12_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + + +day_time = "pt_wkday_15_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + + +day_time = "pt_wkday_18_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + + +# ----- Facet map with different times of day + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") +# ---------------------- Load in data + +# -------- 1. administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +study_area <- study_area %>% + st_cast("MULTIPOLYGON") + +# move the geographic ID to the first column. od::points_to_od() only keeps the first column as ID +geoid_col = paste0(geography, "21CD") + +study_area <- study_area %>% + relocate(all_of(geoid_col), .before = everything()) + + +# -------- 2. gtfs +gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson") + + +# -------- 3. Pop density base layer + +# --- pop density 1km grid +oa_pop_density <- stars::read_stars("data/external/population_density_grid_uk/gbr_pd_2020_1km_UNadj.tif") +# crop to study area +oa_pop_density_crop <- st_crop(oa_pop_density, study_area) + +# -------- 4. DRT operating zone polygons + +# Specify the directory containing the GeoJSON files +directory_path <- "data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined" + +# Get the list of all GeoJSON files of DRT boundaries (1 file per scenario) +geojson_files <- list.files(directory_path, pattern = "\\.geojson$", full.names = TRUE) + +# Read each GeoJSON file into a list of sf objects +geojson_list <- purrr::map(geojson_files, st_read) +# prepare for plotting +combined_geojson <- do.call(rbind, geojson_list) + +combined_geojson <- combined_geojson %>% + st_transform(st_crs(study_area)) %>% + st_make_valid() + +# plot +tm_shape(st_union(study_area)) + + tm_borders(lwd =2, + col = "grey15") + + tm_shape(oa_pop_density_crop) + + tm_raster(title = "People / Km2", + palette = "Blues", + alpha = 0.5, + style = "log10_pretty") + + # bus layer + tm_shape(gtfs_bus %>% + filter(startsWith(scenario, "pt_wkday")) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 5.5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + tm_facets(by = "scenario", + #by = "commute_all", + free.coords = FALSE, + nrow = 2, + showNA = FALSE) + + # ---- clusters + # poly border + tm_shape(combined_geojson %>% + st_buffer(1000)) + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + + tm_facets(by = "scenario", + #by = "commute_all", + free.coords = FALSE, + nrow = 2, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Operating Zones (Temporal Variation)"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2) -> map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES + +map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES + +plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/") + +tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES, filename = paste0(plots_path, "map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES.png"), width = 12, dpi = 1080, asp = 0) + + + + + + + + diff --git a/code/demand_on_buses.R b/code/demand_on_buses.R index 01b8806..a2ee745 100644 --- a/code/demand_on_buses.R +++ b/code/demand_on_buses.R @@ -51,7 +51,7 @@ gtfs_paths = paste0(gtfs_dir, gtfs_names) # --- read in the feeds -gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus", gtfs_paths)]) +gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus_temporal", gtfs_paths)]) #gtfs_rail <- gtfstools::read_gtfs(gtfs_paths[grepl("rail", gtfs_paths)]) # # --- filter the feeds to a specific point in time @@ -241,7 +241,7 @@ plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/temporal/" # TRIP LEVEL trips_sd = trips_sd %>% - mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening"))) + mutate(combination = factor(combination, levels = c("pt_wkday_06_30", "pt_wkday_09_30", "pt_wkday_12_30", "pt_wkday_15_30", "pt_wkday_18_30"))) # equal split @@ -314,7 +314,7 @@ ggsave(filename = paste0(plots_path, "plot_hist_potential_demand_routes_two_meth # ----- OD LEVEL od_trips_sd = od_trips_sd %>% - mutate(combination = factor(combination, levels = c("pt_wkday_morning", "pt_wkday_afternoon", "pt_wkday_evening"))) + mutate(combination = factor(combination, levels = c("pt_wkday_06_30", "pt_wkday_09_30", "pt_wkday_12_30", "pt_wkday_15_30", "pt_wkday_18_30"))) # equal split @@ -331,7 +331,7 @@ od_trips_sd %>% ungroup() %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + - geom_histogram(binwidth = 0.5, alpha = 0.8) + + geom_histogram(binwidth = 0.2, alpha = 0.8) + facet_grid(.~combination) + #geom_density()+ #scale_y_log10() + @@ -366,7 +366,7 @@ od_trips_sd %>% ungroup() %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + - geom_histogram(binwidth = 0.5, alpha = 0.8) + + geom_histogram(binwidth = 0.2, alpha = 0.8) + #scale_y_log10() + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", @@ -396,7 +396,7 @@ od_trips_sd %>% ungroup() %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + - geom_histogram(binwidth = 0.5, alpha = 0.8) + + geom_histogram(binwidth = 0.2, alpha = 0.8) + #scale_y_log10() + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", diff --git a/code/eda_travel_demand_nobus.R b/code/eda_travel_demand_nobus.R index 33020a2..11441e7 100644 --- a/code/eda_travel_demand_nobus.R +++ b/code/eda_travel_demand_nobus.R @@ -55,8 +55,8 @@ plots_path <- paste0("data/processed/plots/eda/flows_no_direct/", geography, "/" # ---------- 2. Filter GTFS by time of day (one feed for each combination) # feeds -gtfs_bus <- gtfs_feeds$study_area_gtfs_bus_f.zip -gtfs_rail <- gtfs_feeds$study_area_gtfs_rail_f.zip +gtfs_bus <- gtfs_feeds$study_area_gtfs_bus_temporal_f.zip +gtfs_rail <- gtfs_feeds$study_area_gtfs_rail_temporal_f.zip # add shapes.txt to the rail feed gtfs_rail <- UK2GTFS::ATOC_shapes(gtfs_rail) @@ -65,15 +65,21 @@ gtfs_rail <- UK2GTFS::ATOC_shapes(gtfs_rail) scenarios <- tribble( ~scenario, ~date, ~min_departure_time, ~max_arrival_time, # public transport at different times of day / week - "pt_wkday_morning", "2023-08-14", "06:00:00", "09:00:00", - "pt_wkday_afternoon", "2023-08-14", "11:00:00", "14:00:00", - "pt_wkday_evening", "2023-08-14", "17:00:00", "20:00:00", - "pt_wkday_night", "2023-08-14", "21:30:00", "23:59:00:00", - "pt_wkend_morning", "2023-08-13", "06:00:00", "09:00:00", - "pt_wkend_evening", "2023-08-13", "17:00:00", "20:00:00", + "pt_wkday_06_30", "2023-08-14", "05:00:00", "08:00:00", + "pt_wkday_09_30", "2023-08-14", "08:00:00", "11:00:00", + "pt_wkday_12_30", "2023-08-14", "11:00:00", "14:00:00", + "pt_wkday_15_30", "2023-08-14", "14:00:00", "17:00:00", + "pt_wkday_18_30", "2023-08-14", "17:00:00", "20:00:00", + "pt_wkend_06_30", "2023-08-14", "05:00:00", "08:00:00", + "pt_wkend_09_30", "2023-08-14", "08:00:00", "11:00:00", + "pt_wkend_12_30", "2023-08-14", "11:00:00", "14:00:00", + "pt_wkend_15_30", "2023-08-14", "14:00:00", "17:00:00", + "pt_wkend_18_30", "2023-08-14", "17:00:00", "20:00:00", ) + + # Bus gtfs_bus_filtered <- filter_gtfs_feed_internal(gtfs_feed = gtfs_bus, scenarios = scenarios, @@ -97,8 +103,8 @@ gtfs_bus_filtered_df <- bind_rows(gtfs_bus_filtered) gtfs_rail_filtered_df <- bind_rows(gtfs_rail_filtered) # save the output -st_write(gtfs_bus_filtered_df, "data/interim/gtfs_freq/gtfs_bus_sf.geojson", delete_dsn =TRUE) -st_write(gtfs_rail_filtered_df, "data/interim/gtfs_freq/gtfs_rail_sf.geojson", delete_dsn =TRUE) +st_write(gtfs_bus_filtered_df, "data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson", delete_dsn =TRUE) +st_write(gtfs_rail_filtered_df, "data/interim/gtfs_freq/gtfs_rail_sf_temporal.geojson", delete_dsn =TRUE) # Bus headway (morning peak) @@ -110,7 +116,7 @@ tm_shape(study_area) + alpha = 0.5) + tm_shape(gtfs_bus_filtered_df %>% mutate(headway_inv = 1/headway_secs) %>% - filter(headway_secs < 3600, scenario == "pt_wkday_morning")) + + filter(headway_secs < 3600, scenario == "pt_wkday_06_30")) + tm_lines(col = "headway_secs", lwd = "headway_inv", scale = 5, diff --git a/code/gtfs_convert_stop_times_to_freq.R b/code/gtfs_convert_stop_times_to_freq.R index 5089463..4acf1a1 100644 --- a/code/gtfs_convert_stop_times_to_freq.R +++ b/code/gtfs_convert_stop_times_to_freq.R @@ -21,16 +21,16 @@ gtfs_rail <- tidytransit::read_gtfs("data/interim/study_area_gtfs_rail.zip") # apply the function gtfs_freq_bus = stop_times_to_frequencies(gtfs = gtfs_bus, - time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00"), - end_time = c("05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00", "23:59:00"))) + time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "06:30:00", "09:30:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00"), + end_time = c("05:00:00", "06:30:00", "09:00:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00", "23:59:00"))) # apply the function gtfs_freq_rail = stop_times_to_frequencies(gtfs = gtfs_rail, - time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00"), - end_time = c("05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00", "23:59:00"))) + time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "06:30:00", "09:30:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00"), + end_time = c("05:00:00", "06:30:00", "09:00:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00", "23:59:00"))) # save the new feed -tidytransit::write_gtfs(gtfs_freq_bus, "data/interim/gtfs_freq/study_area_gtfs_bus_f.zip") -tidytransit::write_gtfs(gtfs_freq_rail, "data/interim/gtfs_freq/study_area_gtfs_rail_f.zip") +tidytransit::write_gtfs(gtfs_freq_bus, "data/interim/gtfs_freq/study_area_gtfs_bus_temporal_f.zip") +tidytransit::write_gtfs(gtfs_freq_rail, "data/interim/gtfs_freq/study_area_gtfs_rail_temporal_f.zip") diff --git a/code/performance_od.R b/code/performance_od.R index d6d806b..9f8b6c0 100644 --- a/code/performance_od.R +++ b/code/performance_od.R @@ -123,7 +123,7 @@ tm_shape(study_area) + tm_borders(col = "grey60", alpha = 0.5) + tm_shape(od_demand_sf_rank %>% - filter(combination == "pt_wkday_morning") %>% + filter(combination == "pt_wkday_06_30") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", lwd = "total_flow", @@ -166,7 +166,7 @@ tm_shape(study_area) + tm_borders(col = "grey60", alpha = 0.5) + tm_shape(od_demand_sf_rank %>% - filter(combination == "pt_wkday_morning", n_rides == 1)) + + filter(combination == "pt_wkday_06_30", n_rides == 1)) + tm_lines(col = "total_flow", lwd = "total_flow", legend.lwd.show = FALSE, @@ -207,7 +207,7 @@ tm_shape(study_area) + tm_fill(col = "grey80", alpha = 0.5) + tm_shape(od_demand_sf_rank %>% - filter(combination == "pt_wkday_morning", speed_percentile <= 0.25)) + + filter(combination == "pt_wkday_06_30", speed_percentile <= 0.25)) + tm_lines(col = "total_flow", lwd = "total_flow", scale = 10, diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R index 97d6857..37ea7f9 100644 --- a/code/travel_demand_temporal.R +++ b/code/travel_demand_temporal.R @@ -122,7 +122,7 @@ cpc_matrices_all_internal <- cpc_matrices_all_internal %>% # ---------- MSOA level # read in the data -tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded.parquet") +tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded_temporal.parquet") # some OD pairs don't have travel time data (for some combinations). Expand grid so that we explitly mention these OD pairs tt_matrix_msoa_exp <- tidyr::crossing(from_id = tt_matrix_msoa$from_id, to_id = tt_matrix_msoa$to_id, combination = tt_matrix_msoa$combination) @@ -157,9 +157,11 @@ tt_matrix_msoa <- tt_matrix_msoa %>% # add combination column to cpc data cpc_matrices_all_internal <- cpc_matrices_all_internal %>% mutate(combination = case_when( - source %in% c("05-06", "06-07", "07-08", "08-09", "09-10", "10-11", "11-12") ~ "pt_wkday_morning", - source %in% c("12-13", "13-14", "14-15", "15-16", "16-17") ~ "pt_wkday_afternoon", - source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_evening") + source %in% c("05-06", "06-07", "07-08") ~ "pt_wkday_06_30", + source %in% c("08-09", "09-10", "10-11") ~ "pt_wkday_09_30", + source %in% c("11-12", "12-13", "13-14") ~ "pt_wkday_12_30", + source %in% c("14-15", "15-16", "16-17") ~ "pt_wkday_15_30", + source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_18_30") ) # group by combination column From f9bd6b427ea501eb6644fe3b97d4b7d141c07d17 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Mon, 18 Nov 2024 18:49:33 +0100 Subject: [PATCH 15/16] updates --- code/demand_cluster_flows_maps_temporal.R | 36 ++++++++------ code/demand_cluster_flows_prep.R | 1 - code/demand_cluster_flows_run.R | 57 +++++++++++++++++++---- 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R index 3ca40fa..b85849b 100644 --- a/code/demand_cluster_flows_maps_temporal.R +++ b/code/demand_cluster_flows_maps_temporal.R @@ -21,7 +21,10 @@ clustering <- "equal" distance_threshold <- 50000 # 10000 # save plots? -save = FALSE +save = TRUE + +# # minimum number of commuters in a cluster for it to be part of our analysis +# commuters_sum_minimum = 150 # ------------------------ Load in the data -------------------------- # @@ -53,8 +56,15 @@ study_area <- study_area %>% relocate(all_of(geoid_col), .before = everything()) -plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/") - +polygons_path <-paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/polygons_combined/", "min_commuters_", commuters_sum_minimum, "/") +if (!dir.exists(polygons_path)) { + dir.create(polygons_path, recursive = TRUE) +} +plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/", "min_commuters_", commuters_sum_minimum, "/") +# Check if the directory exists, and if not, create it +if (!dir.exists(plots_path)) { + dir.create(plots_path, recursive = TRUE) +} # ------------------------- VISUALISE RESULTS ------------------------- # @@ -85,7 +95,7 @@ plot(cluster_dbscan_res["cluster"]) # get clusters to map cluster_dbscan_res %>% filter(size > 15, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) -> clusters_vis @@ -295,7 +305,7 @@ if(save == TRUE){ clusters_vis_mode_poly <- clusters_vis_mode %>% # filter(size > 7, size < 5000) %>% - # filter(commuters_sum > 200) %>% + # filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% mutate(cluster = as.factor(cluster)) %>% #head(1000) %>% @@ -919,7 +929,7 @@ tm_shape(study_area) + # lines tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1023,7 +1033,7 @@ tm_shape(study_area) + # lines tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1048,7 +1058,7 @@ tm_shape(study_area) + # START AND ENDPOINTS tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1067,7 +1077,7 @@ tm_shape(study_area) + showNA = FALSE) + tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1161,7 +1171,7 @@ tm_shape(study_area) + # lines tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1186,7 +1196,7 @@ tm_shape(study_area) + # START AND ENDPOINTS tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1205,7 +1215,7 @@ tm_shape(study_area) + showNA = FALSE) + tm_shape(clusters_vis_mode %>% filter(size > 7, size < 5000) %>% - filter(commuters_sum > 200) %>% + filter(commuters_sum > commuters_sum_minimum) %>% filter(cluster != 0) %>% filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% mutate(cluster = as.factor(cluster)) %>% @@ -1473,7 +1483,7 @@ clusters_vis_mode_poly_filt3_all = clusters_vis_mode_poly_filt3 %>% mutate(scenario = day_time) # Save to make temporal comparison plot -st_write(clusters_vis_mode_poly_filt3_all, paste0(plots_path, "polygons_combined/", day_time, ".geojson"), delete_dsn = TRUE) +st_write(clusters_vis_mode_poly_filt3_all, paste0(polygons_path, day_time, ".geojson"), delete_dsn = TRUE) diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index 75491c3..de523f8 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -305,7 +305,6 @@ plot(st_geometry(od_demand_jittered %>% # ----- Option 1: All OD pairs od_demand_1 <- od_demand_filtered - # ----- Option 2: OD pairs with poor PT supply (many transfers or low travel speed) od_demand_2 <- od_demand_filtered %>% diff --git a/code/demand_cluster_flows_run.R b/code/demand_cluster_flows_run.R index 6e97183..556bc2b 100644 --- a/code/demand_cluster_flows_run.R +++ b/code/demand_cluster_flows_run.R @@ -1,3 +1,8 @@ + + +# -------------------- Clusteting + + sensitivity = TRUE day_time = "pt_wkday_06_30" source("code/demand_cluster_flows.R") @@ -83,15 +88,15 @@ dbscan_sensitivity_res_compare %>% ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_compare.png"), width = 14, height = 10) - - +# minimum number of commuters in a cluster for it to be part of our analysis +commuters_sum_minimum = 150 # -------------- Plots day_time = "pt_wkday_06_30" source("code/demand_cluster_flows_maps_temporal.R") print(paste0("finished scenario: ", day_time)) # clear environment -rm(list = ls()) +rm(list = setdiff(ls(), "commuters_sum_minimum")) gc() @@ -99,7 +104,7 @@ day_time = "pt_wkday_09_30" source("code/demand_cluster_flows_maps_temporal.R") print(paste0("finished scenario: ", day_time)) # clear environment -rm(list = ls()) +rm(list = setdiff(ls(), "commuters_sum_minimum")) gc() @@ -107,7 +112,7 @@ day_time = "pt_wkday_12_30" source("code/demand_cluster_flows_maps_temporal.R") print(paste0("finished scenario: ", day_time)) # clear environment -rm(list = ls()) +rm(list = setdiff(ls(), "commuters_sum_minimum")) gc() @@ -115,7 +120,7 @@ day_time = "pt_wkday_15_30" source("code/demand_cluster_flows_maps_temporal.R") print(paste0("finished scenario: ", day_time)) # clear environment -rm(list = ls()) +rm(list = setdiff(ls(), "commuters_sum_minimum")) gc() @@ -123,10 +128,11 @@ day_time = "pt_wkday_18_30" source("code/demand_cluster_flows_maps_temporal.R") print(paste0("finished scenario: ", day_time)) # clear environment -rm(list = ls()) +rm(list = setdiff(ls(), "commuters_sum_minimum")) gc() + # ----- Facet map with different times of day source("R/study_area_geographies.R") @@ -164,7 +170,7 @@ oa_pop_density_crop <- st_crop(oa_pop_density, study_area) # -------- 4. DRT operating zone polygons # Specify the directory containing the GeoJSON files -directory_path <- "data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined" +directory_path <- paste0("data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined/", commuters_sum_minimum, "/") # Get the list of all GeoJSON files of DRT boundaries (1 file per scenario) geojson_files <- list.files(directory_path, pattern = "\\.geojson$", full.names = TRUE) @@ -238,7 +244,8 @@ tm_shape(st_union(study_area)) + map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES -plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/") +plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/", "min_commuters_", commuters_sum_minimum) + tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES, filename = paste0(plots_path, "map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES.png"), width = 12, dpi = 1080, asp = 0) @@ -249,3 +256,35 @@ tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet + + + + + + + + + + + + + + + + + + + +# ------------------------ Test clustering input -------------------------- # + + +od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson")) + + +od_demand_jittered %>% + st_drop_geometry() %>% + pivot_longer(cols = c(scenario_1, scenario_2, scenario_3), + names_to = "scenario") %>% + group_by(combination, scenario) %>% + summarise(number_of_ods = sum(value == 1)) + From f92199c92098bb0292d08f2405061e34c5daeae7 Mon Sep 17 00:00:00 2001 From: Hussein Mahfouz <45176416+Hussein-Mahfouz@users.noreply.github.com> Date: Mon, 9 Dec 2024 22:06:52 +0100 Subject: [PATCH 16/16] tmap lines legend --- code/demand_cluster_flows_maps_temporal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R index b85849b..4a34e0c 100644 --- a/code/demand_cluster_flows_maps_temporal.R +++ b/code/demand_cluster_flows_maps_temporal.R @@ -1184,7 +1184,7 @@ tm_shape(study_area) + alpha = 0.4, title.col = "Travel demand", #title.lwd = "No. of commuters", - legend.col.show = FALSE, + #legend.col.show = FALSE, legend.lwd.show = FALSE, # remove "missing from legend showNA = FALSE) +