diff --git a/R/dbscan_sensitivity.R b/R/dbscan_sensitivity.R index 1f813fa..0d5866d 100644 --- a/R/dbscan_sensitivity.R +++ b/R/dbscan_sensitivity.R @@ -5,13 +5,13 @@ #' @param options_minpts a vector of options for the minPts paramter #' @param weights a vector where each value represents the number of people going between an OD pair (to pass to the weights argument of dbscan) #' @param flows the original od matrix with a column that represents flows between od pairs -#' +#' @param flow_column the column that represents the number of people travelling between an OD pair #' @return a df with columns {id} (to identify the eps and minpts used), {cluster}, {size} (no of desire lines in cluster), commuters_sum (total no. of commuters in cluster) #' #' @examples #' #' @export -dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, weights, flows){ +dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, weights, flows, flow_column){ # dataframe with all combinations of eps and minpts options_parameters <- tidyr::expand_grid(eps = options_epsilon, minpts = options_minpts) @@ -53,7 +53,7 @@ dbscan_sensitivity = function(distance_matrix, options_epsilon, options_minpts, cluster_res_i <- cluster_dbscan_res_i %>% st_drop_geometry() %>% group_by(cluster) %>% - summarise(size = n(), commuters_sum = sum(commute_all)) %>% + summarise(size = n(), commuters_sum = sum(.data[[flow_column]])) %>% arrange(desc(size)) %>% ungroup() diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R index 5400e80..5c4b16d 100644 --- a/code/demand_cluster_flows.R +++ b/code/demand_cluster_flows.R @@ -16,12 +16,17 @@ source("R/dbscan_sensitivity.R") ########## ----------------------- Read in the data ----------------------- ########## geography <- "MSOA" +od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson")) # od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson")) -od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson")) # path tp save plots plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/") +# what combination are we clustering +# day_time = "pt_wkday_06_30" +# sensitivity analysis? +# sensitivity = FALSE + # ----------- 1. Study area # --- administrative boundaries @@ -42,6 +47,21 @@ geoid_col = paste0(geography, "21CD") study_area <- study_area %>% relocate(all_of(geoid_col), .before = everything()) +# add distance_m column + +crs_orig = st_crs(od_demand_jittered) +crs_metric = 3857 + +od_demand_jittered = od_demand_jittered %>% + st_transform(crs_metric) %>% + mutate(distance_m = units::drop_units(sf::st_length(.))) %>% + st_transform(crs_orig) %>% + filter(distance_m >= 500) + + + +od_demand_jittered = od_demand_jittered %>% + filter(combination == day_time) ########## ----------------------- Different parameter combinations ----------------------- ########## # # 1) All flows + equal weight to origins and destinations (for flow distance) @@ -57,7 +77,7 @@ study_area <- study_area %>% # # 3) Flows with poor PT supply and low potential demand + and equal weight to origins and destinations (for flow distance) scenario <- 3 clustering <- "equal" -distance_threshold <- round(max(od_demand_jittered$distance_m), -3) +distance_threshold <- 50000 # # # 2) Focusing on shorter distances # scenario <- 3 @@ -138,10 +158,10 @@ od_demand_xyuv <- od_demand_jittered %>% # assign unique id to each point od_demand_xyuv <- od_demand_xyuv %>% group_by(Origin, x, y) %>% - mutate(O_ID = cur_group_id()) %>% + mutate(O_ID = paste0(cur_group_id(), "_", row_number())) %>% ungroup() %>% group_by(Destination, u, v) %>% - mutate(D_ID = cur_group_id()) %>% + mutate(D_ID = paste0(cur_group_id(), "_", row_number())) %>% ungroup() # assign unique id to each OD pair @@ -261,11 +281,11 @@ w <- dist_mat %>% select(flow_ID) %>% # add commuting data to inner_join(od_demand_jittered %>% - select(flow_ID, commute_all), + select(flow_ID, total_flow), by = "flow_ID") # weight vector -w_vec <- as.vector(w$commute_all) +w_vec <- as.vector(w$total_flow) @@ -307,56 +327,60 @@ hist(distances$fds, breaks = 100) # # # ----- Sensitivity analysis(for different epsilon and minpts combinaitons) # -# # function to get clustering results for many combinations -# dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, -# options_epsilon <- c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 7.5, 8, 9), -# options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 700, 1000, 1500, 3000, 5000, 10000), -# weights = w_vec, -# flows = st_drop_geometry(od_demand_jittered) -# ) -# -# -# arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) -# # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) -# -# -# # All results plotted together -# dbscan_sensitivity_res %>% -# filter(cluster != 0) %>% -# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + -# geom_col() + -# scale_y_continuous(trans='log10') + -# facet_wrap(~id, scales = "fixed") + -# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", -# subtitle = "Parameter combinations that returned more than 1 cluster", -# x = "Cluster no.", -# y = "No. of od pairs in cluster", -# fill= "No. of commuters") + -# theme_minimal() -# -# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_all.png", width = 14, height = 10) -# -# dbscan_sensitivity_res %>% -# filter(cluster != 0) %>% -# group_by(id) %>% -# #mutate(clusters = n()) %>% -# # How many clusters have more than 5 od pairs in them? -# mutate(clusters = sum(size > 5)) %>% -# ungroup() %>% -# filter(clusters > 5) %>% -# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + -# geom_col() + -# scale_y_continuous(trans='log10') + -# facet_wrap(~id, scales = "fixed") + -# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", -# subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", -# x = "Cluster no.", -# y = "No. of od pairs in cluster", -# fill= "No. of commuters") + -# theme_minimal() -# -# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_filtered.png", width = 14, height = 10) -# +# function to get clustering results for many combinations +if(sensitivity == TRUE){ + dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat, + options_epsilon <- c(1, 1.5, 2, 3, 4, 5, 6, 7, 8, 9), + options_minpts <- c(50, 75, 100, 150, 200, 300, 500, 1000, 2500), + weights = w_vec, + flows = st_drop_geometry(od_demand_jittered), + flow_column = "total_flow" + ) + + + arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet")) + # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet")) + + + # All results plotted together + dbscan_sensitivity_res %>% + filter(cluster != 0) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_wrap(~id, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations that returned more than 1 cluster", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + + ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10) + + dbscan_sensitivity_res %>% + filter(cluster != 0) %>% + group_by(id) %>% + #mutate(clusters = n()) %>% + # How many clusters have more than 5 od pairs in them? + mutate(clusters = sum(size > 5)) %>% + ungroup() %>% + filter(clusters > 5) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_wrap(~id, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + + ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10) + +} + ### -------------------- STEP 3: Cluster -------------------- ### @@ -364,8 +388,8 @@ hist(distances$fds, breaks = 100) # cluster option 1: border points assigned to cluster cluster_dbscan = dbscan::dbscan(dist_mat, - minPts = 70, # 125 - eps = 8, # 9.5 + minPts = 50, # evening: 50 # 125 + eps = 7, # evening: 7 # 9.5 #borderPoints = FALSE, weights = w_vec) # # for splitted distance @@ -408,10 +432,46 @@ cluster_dbscan_res <- od_demand_jittered %>% # save -st_write(cluster_dbscan_res, paste0("data/processed/clustering/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, ".geojson"), delete_dsn = TRUE) - +st_write(cluster_dbscan_res, paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson"), delete_dsn = TRUE) +# +# +# +# # --- Compare clusters at different points in the day +# dbscan_sensitivity_res_wkday_06_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_06_30") +# dbscan_sensitivity_res_wkday_09_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_09_30") +# dbscan_sensitivity_res_wkday_12_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_12_30") +# dbscan_sensitivity_res_wkday_15_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_15_30") +# dbscan_sensitivity_res_wkday_18_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% +# mutate(scenario = "pt_wkday_18_30") +# +# dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_morning, +# dbscan_sensitivity_res_wkday_evening) +# +# +# dbscan_sensitivity_res_compare %>% +# filter(cluster != 0) %>% +# group_by(id) %>% +# #mutate(clusters = n()) %>% +# # How many clusters have more than 5 od pairs in them? +# mutate(clusters = sum(size > 25)) %>% +# ungroup() %>% +# filter(clusters > 25) %>% +# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + +# geom_col() + +# scale_y_continuous(trans='log10') + +# facet_grid(id ~ scenario, scales = "fixed") + +# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", +# subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each", +# x = "Cluster no.", +# y = "No. of od pairs in cluster", +# fill= "No. of commuters") + +# theme_bw() diff --git a/code/demand_cluster_flows_maps.R b/code/demand_cluster_flows_maps.R index 5aafdd1..38edb05 100644 --- a/code/demand_cluster_flows_maps.R +++ b/code/demand_cluster_flows_maps.R @@ -377,7 +377,7 @@ tm_shape(study_area) + #panel.show = FALSE, panel.label.size = 1, panel.label.bg.color = NA, - # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), + # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly map_cluster_results_bus_frac_grouped_gtfs_poly @@ -742,7 +742,7 @@ tm_shape(study_area) + breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), palette = "RdYlGn", #Accent alpha = 0.4, - # title.col = "Fraction of Bus to Car users", + # title.col = "Fraction of Bus to Car users", #title.lwd = "No. of commuters", legend.col.show = FALSE, legend.lwd.show = FALSE, @@ -1143,7 +1143,7 @@ tm_shape(basemap_urban_rural) + ) + # ---- clusters # poly border -tm_shape(clusters_vis_mode_poly %>% + tm_shape(clusters_vis_mode_poly %>% filter(cluster %in% clusters_vis_mode_poly_filt$cluster)) + tm_borders(col = "black", lwd = 3, @@ -1154,9 +1154,9 @@ tm_shape(clusters_vis_mode_poly %>% nrow = rows, showNA = FALSE) + # poly fill -# tm_shape(cluster_dbscan_res_mode_poly_filt_max %>% -# filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>% -tm_shape(clusters_vis_mode_poly_filt %>% + # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>% + # filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>% + tm_shape(clusters_vis_mode_poly_filt %>% st_buffer(1000)) + tm_borders(col = "darkgreen", lwd = 2) + @@ -1221,9 +1221,9 @@ tm_shape(basemap_urban_rural) + tm_shape(st_union(clusters_vis_mode_poly_filt %>% mutate(area = st_area(.)) %>% filter(area > 0.2 * mean(area)))) + - tm_borders(col = "darkgreen", - lwd = 3.5, - lty = "dashed") + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + tm_layout(fontfamily = 'Georgia', main.title = paste0("Potential DRT Service Areas"), main.title.size = 1.1, @@ -1487,8 +1487,8 @@ ggplot(clusters_ur_poly_combined) + #direction = -1, labels = function(x) str_wrap(x, width = 25)) + scale_color_manual(values = colors_urban_rural, - #direction = -1, - labels = function(x) str_wrap(x, width = 25)) + + #direction = -1, + labels = function(x) str_wrap(x, width = 25)) + labs(x = "", y = "Area covered by cluster (km2)", fill = "Rural / Urban Classification", @@ -1507,4 +1507,3 @@ ggplot(clusters_ur_poly_combined) + facet_wrap(facets = vars(cluster)) ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) - diff --git a/code/demand_cluster_flows_maps_temporal.R b/code/demand_cluster_flows_maps_temporal.R new file mode 100644 index 0000000..4a34e0c --- /dev/null +++ b/code/demand_cluster_flows_maps_temporal.R @@ -0,0 +1,2009 @@ +########################################################################################## +### The purpose of this script is to visualize the results from demand_cluster_flows.R ### +########################################################################################## + +library(tidyverse) +library(sf) +library(lwgeom) +library(geos) +# library(spatstat) +library(tmap) + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") + + + +# ------------------------- Define the scenario ------------------------- # + +scenario <- 3 # 3, 2 +clustering <- "equal" +distance_threshold <- 50000 # 10000 + +# save plots? +save = TRUE + +# # minimum number of commuters in a cluster for it to be part of our analysis +# commuters_sum_minimum = 150 + +# ------------------------ Load in the data -------------------------- # + +# ----- Clustering results +# day_time = "pt_wkday_06_30" +cluster_dbscan_res <- st_read(paste0("data/processed/clustering/temporal/scenario_", scenario, "_distance_", distance_threshold, "_", clustering, "_", day_time, ".geojson")) + + +# ----- Study area + +# ----------- 1. Study area + +# --- administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") + +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +study_area <- study_area %>% + st_cast("MULTIPOLYGON") + +# move the geographic ID to the first column. od::points_to_od() only keeps the first column as ID + +geoid_col = paste0(geography, "21CD") + +study_area <- study_area %>% + relocate(all_of(geoid_col), .before = everything()) + + +polygons_path <-paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/polygons_combined/", "min_commuters_", commuters_sum_minimum, "/") +if (!dir.exists(polygons_path)) { + dir.create(polygons_path, recursive = TRUE) +} +plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/", "min_commuters_", commuters_sum_minimum, "/") +# Check if the directory exists, and if not, create it +if (!dir.exists(plots_path)) { + dir.create(plots_path, recursive = TRUE) +} + +# ------------------------- VISUALISE RESULTS ------------------------- # + +cluster_dbscan_res = cluster_dbscan_res %>% + rename(commute_all = total_flow) + + +# check size per cluster and total commuters per cluster +cluster_dbscan_res %>% + st_drop_geometry() %>% + group_by(cluster) %>% + summarise(size = n(), commuters_sum = sum(commute_all)) %>% + arrange(desc(size)) %>% + head(15) + +# add size and total commuters columns +cluster_dbscan_res <- cluster_dbscan_res %>% + group_by(cluster) %>% + mutate(size = n(), + commuters_sum = sum(commute_all)) %>% + ungroup() + +# plot +plot(cluster_dbscan_res["cluster"]) + +# ----- assign number of rows dynamically for facet plot + +# get clusters to map +cluster_dbscan_res %>% + filter(size > 15, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) -> clusters_vis + + +# add alternative cluster labels +clusters_vis <- clusters_vis %>% + rename(cluster_orig = cluster) %>% + arrange(commuters_sum) %>% + group_by(cluster_orig) %>% + mutate(cluster = cur_group_id()) %>% + ungroup() + + +# Keep only the 12 largest clusters by commuters_sum if there are more than 12 +if (n_distinct(clusters_vis$cluster) > 12) { + top_clusters <- clusters_vis %>% + group_by(cluster) %>% + summarise(total_commuters = first(commuters_sum)) %>% + arrange(desc(total_commuters)) %>% + slice_head(n = 12) %>% + pull(cluster) + + clusters_vis <- clusters_vis %>% + filter(cluster %in% top_clusters) +} + +# we want maximum 4 maps per row +rows <- round(length(unique(clusters_vis$cluster)) / 3) + + +##### ---------- MAPS ---------- ##### +# +# ### ---------- Plot 1: Top n Clusters (Facet Plot) ---------- ### +# +# tm_shape(study_area) + +# tm_borders(col = "grey60", +# alpha = 0.5) + +# tm_shape(study_area) + +# tm_fill(col = "grey95", +# alpha = 0.5) + +# tm_shape(clusters_vis) + +# tm_lines(lwd = "commute_all", +# col = "cluster", +# #col = "darkgreen", +# scale = 10, +# palette = "Accent", #YlGn +# #style = "pretty", +# alpha = 1, +# title.col = "Cluster", +# title.lwd = "No. of commuters", +# legend.col.show = FALSE, +# # remove "missing from legend +# showNA = FALSE) + +# tm_facets(by = "cluster", +# free.coords = FALSE, +# nrow = rows, +# showNA = FALSE) + +# tm_layout(fontfamily = 'Georgia', +# main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), +# main.title.size = 1.1, +# main.title.color = "azure4", +# main.title.position = "left", +# legend.outside = TRUE, +# legend.outside.position = "bottom", +# legend.stack = "horizontal", +# # remove panel headers +# panel.show = FALSE, +# frame = FALSE) -> map_cluster_results +# +# map_cluster_results +# +# if(save == TRUE){ +# tmap_save(tm = map_cluster_results, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_.png"), width = 12, dpi = 1080, asp = 0) +# } + + +### ---------- Plot 2: Compare mode composition of each cluster (desire line level) ---------- ### + +clusters_vis_mode <- clusters_vis + +### ---------- Plot 3: Compare mode composition of each cluster (cluster level) ---------- ### + + +# Get mode composition of entire cluster + +clusters_vis_mode_summary <- clusters_vis_mode %>% + st_drop_geometry() %>% + group_by(cluster, size) %>% + summarise(across(starts_with("commute_"), sum)) %>% + ungroup() + +# add commuting fraction +clusters_vis_mode <- clusters_vis_mode %>% + inner_join(clusters_vis_mode_summary %>% select(cluster), by = "cluster") + +# +# tm_shape(study_area) + +# tm_borders(col = "grey60", +# alpha = 0.5) + +# tm_shape(study_area) + +# tm_fill(col = "grey95", +# alpha = 0.5) + +# tm_shape(clusters_vis_mode) + +# tm_lines(lwd = "commute_all", +# col = "commute_all", +# scale = 10, +# palette = "RdYlGn", #Accent +# #style = "pretty", +# alpha = 1, +# title.col = "Travel demand", +# title.lwd = "No. of commuters", +# #legend.col.is.portrait = FALSE, +# # remove "missing from legend +# showNA = FALSE) + +# tm_facets(by = "cluster", +# free.coords = FALSE, +# nrow = rows, +# showNA = FALSE) + +# tm_layout(fontfamily = 'Georgia', +# main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), +# main.title.size = 1.1, +# main.title.color = "azure4", +# main.title.position = "left", +# # legend.outside = TRUE, +# # legend.outside.position = "bottom", +# # legend.stack = "horizontal", +# # remove panel headers +# panel.show = FALSE, +# frame = FALSE) -> map_cluster_results_bus_frac_grouped +# +# map_cluster_results_bus_frac_grouped +# +# if(save == TRUE){ +# tmap_save(tm = map_cluster_results_bus_frac_grouped, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_.png"), width = 12, dpi = 1080, asp = 0) +# } + +# --- Map with clusters ovelayed onto bus routes + + + +gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson") + +# gtfs_bus <- gtfs_bus %>% +# st_filter(st_union(study_area), .predicate = st_within) + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey75", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # clusters + tm_shape(clusters_vis_mode) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 10, + palette = "RdYlGn", #Accent + #style = "pretty", + alpha = 1, + title.col = "Travel demand", + title.lwd = "No. of commuters", + #legend.col.is.portrait = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + panel.show = FALSE, + frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs + +map_cluster_results_bus_frac_grouped_gtfs + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Map with clusters as polygons (convex_hull()) + +# turn clusters into polygons using convex hull + +clusters_vis_mode_poly <- clusters_vis_mode %>% + # filter(size > 7, size < 5000) %>% + # filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + mutate(cluster = as.factor(cluster)) %>% + #head(1000) %>% + group_by(cluster) %>% + summarize(commuters_sum = first(commuters_sum), + geometry = st_convex_hull(st_union(geometry))) %>% + ungroup() + + +# plot + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # clusters + tm_shape(clusters_vis_mode_poly) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + #style = "pretty", + alpha = 0.3, + title = "Travel demand", + #legend.col.is.portrait = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + #panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), + frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly + +map_cluster_results_bus_frac_grouped_gtfs_poly + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Map with clusters as polygons (convex_hull()) + lines in background + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly +tm_shape(clusters_vis_mode_poly) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Fraction of Bus \nto Car users", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)), + #panel.show = FALSE, + frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines.png"), width = 12, dpi = 1080, asp = 0) +} + + +# ------ Map with clusters as polygons (convex_hull()) + lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS + + +# GTFS: Approach 1 - st_union() +# get high frequency buses +gtfs_bus_freq <- gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 3600) %>% + # this is necessary so that geos::geos_concave_hull gives useful results later + st_filter(st_union(study_area), .predicate = st_within) %>% + st_make_valid() + +# convert to one geom in order to intersect by +gtfs_bus_freq1 <- gtfs_bus_freq %>% + st_transform(3857) %>% + st_buffer(1000) %>% + st_union() + +clusters_vis_mode_poly_filt <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq1) + + + +# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon +clusters_vis_mode_poly_filt <- clusters_vis_mode_poly_filt %>% + st_cast("MULTIPOLYGON") %>% + st_cast("POLYGON") %>% + st_make_valid() + +# retain largest poly in each cluster +clusters_vis_mode_poly_filt_max <- clusters_vis_mode_poly_filt %>% + mutate(area = st_area(.)) %>% + group_by(cluster) %>% + filter(area == max(area)) + +# convex hull for aesthetic +clusters_vis_mode_poly_filt_max <- st_convex_hull(clusters_vis_mode_poly_filt_max) + + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt_max %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt_max$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed")-> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff.png"), width = 12, dpi = 1080, asp = 0) +} + + + +# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt_max$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt_max %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt_max$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Map with clusters as polygons (convex_hull()) + lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS + + +# GTFS: APPROACH 2a: concave_hull() AFTER st_union() +gtfs_bus_freq2 <- gtfs_bus_freq %>% + st_transform(3857) %>% + #st_union() %>% + geos::geos_concave_hull(ratio = 0.5, allow_holes = FALSE) %>% + st_as_sf() %>% + st_union() %>% + st_set_crs(3857) %>% + st_buffer(250) + + +# st_difference to get non overlapping geoms +clusters_vis_mode_poly_filt2 <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq2) + + + +# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon +clusters_vis_mode_poly_filt2 <- clusters_vis_mode_poly_filt2 %>% + st_cast("MULTIPOLYGON") %>% + st_cast("POLYGON") %>% + st_make_valid() + +# union geoms per cluster +clusters_vis_mode_poly_filt2 <- clusters_vis_mode_poly_filt2 %>% + group_by(cluster, commuters_sum) %>% + summarise(geometry = st_union(geometry)) %>% + ungroup() + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(cluster %in% clusters_vis_mode_poly_filt2$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commute_all)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt2$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt2 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows (OD", scenario, ") - ", day_time), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt2$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave.png"), width = 12, dpi = 1080, asp = 0) +} + + + + + + +# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS ----- CONCAVE HULL + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt2$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt2 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt2$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave.png"), width = 12, dpi = 1080, asp = 0) +} + + + +# GTFS: APPROACH 2b: concave_hull() AFTER st_union() +gtfs_bus_freq3 <- gtfs_bus_freq %>% + st_transform(3857) %>% + st_buffer(100) %>% + st_union() %>% + geos::geos_concave_hull(ratio = 0.25, allow_holes = TRUE) %>% + st_as_sf() %>% + st_set_crs(3857) %>% + st_make_valid() + + +# st_difference to get non overlapping geoms +clusters_vis_mode_poly_filt3 <- st_difference(clusters_vis_mode_poly, gtfs_bus_freq3) + + + +# convert from MULTIPOLYGON to POLYGON and retain largest geom only for each multipolygon +clusters_vis_mode_poly_filt3 <- clusters_vis_mode_poly_filt3 %>% + st_cast("MULTIPOLYGON") %>% + st_cast("POLYGON") %>% + st_make_valid() + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus_freq %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commuters_sum)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2 + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2 + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# --- Same as above but with startpoint and endpoint labelled + + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus_freq %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commuters_sum)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # START AND ENDPOINTS + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_startpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkgreen", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_endpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkred", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + panel.label.size = 1, + panel.label.bg.color = NA, + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") + + tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') + + tm_add_legend(type = "symbol", labels = 'OD End', col = 'darkred') -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints.png"), width = 12, dpi = 1080, asp = 0) +} + + +# Presentation Style - 2 Rows Only + +rows_ppt = 3 + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # lines + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + arrange(commuters_sum)) + + tm_lines(lwd = "commute_all", + col = "commute_all", + scale = 5, + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + palette = "RdYlGn", #Accent + alpha = 0.4, + title.col = "Travel demand", + #title.lwd = "No. of commuters", + #legend.col.show = FALSE, + legend.lwd.show = FALSE, + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + # START AND ENDPOINTS + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_startpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkgreen", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + tm_shape(clusters_vis_mode %>% + filter(size > 7, size < 5000) %>% + filter(commuters_sum > commuters_sum_minimum) %>% + filter(cluster != 0) %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + mutate(cluster = as.factor(cluster)) %>% + mutate(geometry = st_endpoint(.)) %>% + arrange(commuters_sum)) + + tm_dots(col = "darkred", + alpha = 0.6, + jitter = 0.01, + size = "commute_all", + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows_ppt, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + panel.label.size = 1, + panel.label.bg.color = NA, + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") + + tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') + + tm_add_legend(type = "symbol", labels = 'OD End', col = 'darkred') -> map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt + +map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_lines_bus_diff_concave2_endpoints_ppt.png"), width = 12, dpi = 1080, asp = 0) +} + + + +# --- Map with clusters as polygons (convex_hull()) + WITHOUT lines in background - CROP TO AREAS NOT OVERLAPPING GTFS BUS ----- CONCAVE HULL + + +tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(study_area) + + tm_fill(col = "grey95", + alpha = 0.5) + + # bus layer + tm_shape(gtfs_bus_freq %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "grey70", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 2, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + tm_shape(clusters_vis_mode_poly_filt3 %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) %>% + st_buffer(1000)) + + tm_polygons(col = "commuters_sum", + palette = "RdYlGn", #Accent + # # breaks = c(0, 0.25, 0.5, 0.75, 1, Inf), + #style = "pretty", + alpha = 0.2, + title = "Travel demand \n(in potential \nDRT zone)", + # remove "missing from legend + showNA = FALSE) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2 + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2 + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave2, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave2.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +########## - ADD RURAL URBAN AS BASE MAP + +# --- 2011 map and urban rural classification +basemap <- st_read("data/external/oa_england_2011/OA_2011_EW_BGC_V2.shp") %>% + st_transform(st_crs(study_area)) %>% + st_make_valid() + +urban_rural_csv <- read_csv("data/external/census_2011_rural_urban.csv") + + +# add numeric columns for coloring +urban_rural_csv <- urban_rural_csv %>% + mutate(RUC11CD_NM = case_when(RUC11 == "Urban major conurbation" ~ 10, + RUC11 == "Urban minor conurbation" ~ 9, + RUC11 == "Urban city and town" ~ 8, + RUC11 == "Urban city and town in a sparse setting" ~ 7, + RUC11 == "Rural town and fringe" ~ 6, + RUC11 == "Rural town and fringe in a sparse setting" ~ 5, + RUC11 == "Rural village" ~ 4, + RUC11 == "Rural village in a sparse setting" ~ 3, + RUC11 == "Rural hamlets and isolated dwellings" ~ 2, + RUC11 == "Rural hamlets and isolated dwellings in a sparse setting" ~ 1)) %>% + arrange(RUC11CD_NM) %>% + # turn column to factor + mutate(RUC11CD_NM_FCT = as.factor(RUC11CD_NM)) +# join +basemap_urban_rural <- basemap %>% + left_join(urban_rural_csv, by = "OA11CD") + + +# --- filter to study area +basemap_urban_rural <- basemap_urban_rural %>% + st_filter(st_union(study_area) %>% + st_make_valid(), + .predicate = st_intersects) + + + + + + +tm_shape(basemap_urban_rural) + + tm_fill(col = "RUC11CD_NM", + palette = "Greys", + alpha = 0.5, + title = "Level of \nUrbanisation") + + # bus layer + tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly border + tm_shape(clusters_vis_mode_poly %>% + filter(cluster %in% clusters_vis_mode_poly_filt3$cluster)) + + tm_borders(col = "black", + lwd = 3, + lty = "dashed") + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + # poly fill + # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>% + # filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>% + tm_shape(clusters_vis_mode_poly_filt3 %>% + st_buffer(1000)) + + tm_borders(col = "darkgreen", + lwd = 2) + + tm_facets(by = "cluster", + #by = "commute_all", + free.coords = FALSE, + nrow = rows, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Clustered flows"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Cluster', col = 'black', lwd = 2, lty = "dashed") + + tm_add_legend(type = "line", labels = 'Potential DRT\nservice area', col = 'darkgreen', lwd = 2) -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# ---------------------- ONE BIG MAP + +# Combine cluster polygons +clusters_vis_mode_poly_filt3_all = clusters_vis_mode_poly_filt3 %>% + mutate(area = st_area(.)) %>% + filter(area > 0.2 * mean(area)) %>% + st_union() %>% + st_make_valid() %>% + st_as_sf() %>% + mutate(scenario = day_time) + +# Save to make temporal comparison plot +st_write(clusters_vis_mode_poly_filt3_all, paste0(polygons_path, day_time, ".geojson"), delete_dsn = TRUE) + + + + +tm_shape(basemap_urban_rural) + + tm_fill(col = "RUC11CD_NM", + palette = "Greys", + alpha = 0.5, + title = "Level of \nUrbanisation") + + # bus layer +tm_shape(gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly fill + #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + +tm_shape(clusters_vis_mode_poly_filt3_all) + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Service Areas"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map.png"), width = 12, dpi = 1080, asp = 0) +} + + + + +# ----- ONE BIG MAP (gtfs routes overlined) + +gtfs_bus_overline <- gtfs_bus %>% + filter(scenario == day_time) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200) + +gtfs_bus_overline = stplanr::overline(gtfs_bus_overline, attrib = "headway_inv", fun = sum) + +gtfs_bus_overline = gtfs_bus_overline %>% + mutate(headway_inv = round(headway_inv)) + + +tm_shape(basemap_urban_rural) + + tm_fill(col = "RUC11CD_NM", + palette = "Greys", + alpha = 0.5, + title = "Level of \nUrbanisation") + + # bus layer + tm_shape(gtfs_bus_overline) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 20, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.7, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly fill + #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + + tm_shape(clusters_vis_mode_poly_filt3_all) + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Service Areas"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation_one_map_overline.png"), width = 12, dpi = 1080, asp = 0) +} + +# ----- ONE BIG MAP (pop density as background) + + +# --- pop density 1km grid +oa_pop_density <- stars::read_stars("data/external/population_density_grid_uk/gbr_pd_2020_1km_UNadj.tif") +# crop to study area +oa_pop_density_crop <- st_crop(oa_pop_density, study_area) + +tm_shape(st_union(study_area)) + + tm_borders(lwd =2, + col = "grey15") + + tm_shape(oa_pop_density_crop) + + tm_raster(title = "People / Km2", + palette = "Blues", + alpha = 0.5, + style = "log10_pretty") + #log10_pretty #jenks + # bus layer + tm_shape(gtfs_bus_overline) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 20, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.6, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + # ---- clusters + # poly fill + #tm_shape(st_union(clusters_vis_mode_poly_filt3)) + + tm_shape(clusters_vis_mode_poly_filt3_all) + + tm_borders(col = "darkgreen", + lwd = 4, + lty = "dashed") + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Service Areas"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2.5, lty = "dashed") -> map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline + +map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline + +if(save == TRUE){ + tmap_save(tm = map_cluster_results_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_ONE_MAP_overline, filename = paste0(plots_path, "map_clusters_scenario_", scenario, "_", day_time, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_pop_density_one_map_overline.png"), width = 12, dpi = 1080, asp = 0) +} + + + +# ##### ---------- FIGURES ---------- ##### +# +# ### ---------- 1. Get unjittered OD data (code from code/demand_cluster_flows_prep.R) - travel times haven't been affected by jittering +# +# # is the data disaggregated by mode? +# mode <- TRUE +# #mode <- FALSE +# +# # Demand (census) + supply (travel time) data +# +# if(mode == FALSE){ +# # data with "commute_all" only +# #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) +# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) +# } else{ +# # data with modes +# #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) +# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) +# } +# +# # filter to specific combination +# # TODO: get seperate flows for car and pt, and keep two combinations +# od_demand <- od_demand %>% +# filter(combination == "pt_wkday_morning") +# +# od_demand <- od_demand %>% +# select(-distance_m) +# +# # --- create desire lines and remove od pairs with very short distance +# +# # TODO: edit this to avoid clusters of very short flows +# # "Density-based clustering for bivariate-flow data" (section 5.2): preprocessing step to avoid +# # clusters of very short flows. this involves splitting the data into 3 chunks +# # based on length ( +# od_demand_filtered = filter_matrix_by_distance(zones = study_area, +# od_matrix = od_demand, +# dist_threshold = 1000) +# +# # add unique id for each row +# od_demand_filtered <- od_demand_filtered %>% +# mutate(od_id = paste0(Origin, "-", Destination, "-", combination)) +# +# +# +# +# # ----- add the clustering results to the unjittered demand +# od_demand_figures <- cluster_dbscan_res %>% +# select(Origin, Destination, starts_with("commute_"), od_id, flow_ID, cluster, size, commute_all) %>% +# left_join(od_demand_filtered %>% +# st_drop_geometry() %>% +# distinct(od_id, .keep_all = TRUE) %>% +# # travel times before being ruined by od_jitter() +# select(od_id, ends_with("_time"), n_rides, starts_with("speed_"), starts_with("ride_time_")), +# by = "od_id") +# +# # match existing clusters +# # od_demand_figures_filt <- od_demand_figures %>% +# # filter(size > 7, size < 5000) %>% +# # filter(commute_all > 200) %>% +# # filter(cluster != 0) +# +# od_demand_figures_filt <- od_demand_figures %>% +# rename(cluster_orig = cluster) %>% +# inner_join(clusters_vis %>% +# st_drop_geometry() %>% +# select(flow_ID, cluster), +# by = "flow_ID") %>% +# # keep only the clusters after intersection with gtfs +# filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) +# + + +# +# # ----- plots +# +# # scatter plots with x: commute_all, y: fraction of bus commuters +# ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = commute_bus)) + +# geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + +# geom_point() + +# ylim(0, 5) + +# scale_color_distiller(palette= "RdYlGn", direction = 1) + +# labs(x = "total no. of commuters", +# y = "Bus / Car commuters (fraction)", +# title = "Composition of clusters", +# subtitle = "No. of commuters", +# color = "Commuters (bus)") + +# facet_wrap(facets = vars(cluster)) + +# theme_bw() + +# theme(legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_scatter_commuters_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# # paste0(plots_path, "map_clusters_scenario_", scenario, "_", clustering, "_length_", distance_threshold, "_bus_frac_grouped_gtfs_poly_bus_diff_concave_urbanisation.png"), width = 12, dpi = 1080, asp = 0) +# +# +# ggplot(od_demand_figures_filt %>% st_drop_geometry(), aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + +# geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + +# geom_point() + +# ylim(0, 5) + +# scale_color_distiller(palette= "RdYlGn", direction = 1) + +# labs(x = "total no. of commuters", +# y = "Bus / Car commuters (fraction)", +# color = "Average speed of \nbus commute (kph)", +# title = "Composition of clusters: \nNo. of commuters in each OD pair") + +# facet_wrap(facets = vars(cluster)) + +# theme_bw() + +# theme(legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# # same but replacing NA speed for min speed +# ggplot(od_demand_figures_filt %>% +# st_drop_geometry() %>% +# mutate(speed_kph = replace_na(speed_kph, min(speed_kph, na.rm = TRUE))), +# aes(x = commute_all, y = commute_bus / commute_car, color = speed_kph)) + +# geom_point(data = transform(od_demand_figures, cluster = NULL), colour = "grey85") + +# geom_point() + +# ylim(0, 5) + +# scale_color_distiller(palette= "RdYlGn", direction = 1) + +# labs(x = "total no. of commuters", +# y = "Bus / Car commuters (fraction)", +# color = "Average speed of \nbus commute (kph)", +# title = "Composition of clusters: \nNo. of commuters in each OD pair") + +# facet_wrap(facets = vars(cluster)) + +# theme_bw() + +# theme(legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_scatter_commuters_color_speed_facet_cluster_scenario_", scenario, "_length_", distance_threshold, "_no_NA.png"), height = 8, width = 6) +# +# # ----- Get the spatial coverage of each cluster (what portion is in urban / rural etc?) +# +# # get intersection +# clusters_vis_mode_poly %>% +# st_intersection(basemap_urban_rural %>% +# st_transform(st_crs(clusters_vis_mode_poly))) %>% +# # get area +# mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% +# # area by cluster +# st_drop_geometry() %>% +# group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% +# summarise(area_km = sum(area_km)) %>% +# # keep only the clusters after intersection with gtfs +# filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly +# +# # define custom color paletter +# colors_urban_rural <- (c("#01665E", "#35978F", "#80CDC1", "#DFC27D", "#8C510A")) +# # order column based on the RUC11CD_NM_FCT column (degree of urbanisation) +# clusters_ur_poly$RUC11 <- factor(clusters_ur_poly$RUC11, levels = unique(clusters_ur_poly$RUC11)[order(clusters_ur_poly$RUC11CD_NM_FCT)]) +# +# ggplot(clusters_ur_poly, aes(x = RUC11, y = area_km, fill = RUC11)) + +# geom_col(color = "grey60") + +# scale_fill_manual(values = colors_urban_rural, +# labels = function(x) str_wrap(x, width = 25)) + +# labs(x = "", +# y = "Area covered by cluster (km2)", +# color = "Rural / Urban Classification", +# title = "Composition of clusters: Rural / Urban") + +# theme_bw() + +# theme(axis.text.x = element_blank(), +# axis.ticks.x=element_blank(), +# legend.position = "bottom", +# legend.title = element_blank()) + +# guides(fill = guide_legend(nrow = 3)) + +# facet_wrap(facets = vars(cluster)) +# +# ggsave(paste0(plots_path, "figure_bar_urban_rural_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# +# # Same but for filtered polygon +# +# clusters_vis_mode_poly_filt3 %>% +# st_intersection(basemap_urban_rural %>% +# st_transform(st_crs(clusters_vis_mode_poly))) %>% +# # get area +# mutate(area_km = as.numeric(st_area(.) / 1000000)) %>% +# # area by cluster +# st_drop_geometry() %>% +# group_by(cluster, RUC11, RUC11CD_NM_FCT) %>% +# summarise(area_km = sum(area_km)) %>% +# # keep only the clusters after intersection with gtfs +# filter(cluster %in% clusters_vis_mode_poly_filt3$cluster) -> clusters_ur_poly_filt +# +# clusters_ur_poly_filt$RUC11 <- factor(clusters_ur_poly_filt$RUC11, levels = unique(clusters_ur_poly_filt$RUC11)[order(clusters_ur_poly_filt$RUC11CD_NM_FCT)]) +# +# +# ggplot(clusters_ur_poly_filt, aes(x = RUC11, y = area_km, fill = RUC11)) + +# geom_col(color = "grey60") + +# scale_fill_manual(values = colors_urban_rural, +# #direction = -1, +# labels = function(x) str_wrap(x, width = 25)) + +# labs(x = "", +# y = "Area covered by cluster (km2)", +# color = "Rural / Urban Classification", +# title = "Composition of clusters: Rural / Urban") + +# theme_bw() + +# theme(axis.text.x = element_blank(), +# axis.ticks.x=element_blank(), +# legend.position = "bottom", +# legend.title = element_blank()) + +# guides(fill = guide_legend(nrow = 3)) + +# facet_wrap(facets = vars(cluster)) +# +# ggsave(paste0(plots_path, "figure_bar_urban_rural_filtered_by_gtfs_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# +# +# +# ### ---- join the data to plot together +# clusters_ur_poly %>% +# left_join(clusters_ur_poly_filt %>% +# rename(area_km_filt = area_km), +# by = c("cluster", "RUC11")) -> clusters_ur_poly_combined +# +# +# +# # --- plot both together +# ggplot(clusters_ur_poly_combined) + +# geom_col(aes(x = RUC11, y = area_km, color = RUC11, alpha = 0.01)) + +# geom_col(aes(x = RUC11, y = area_km_filt, fill = RUC11)) + +# scale_fill_manual(values = colors_urban_rural, +# #direction = -1, +# labels = function(x) str_wrap(x, width = 25)) + +# scale_color_manual(values = colors_urban_rural, +# #direction = -1, +# labels = function(x) str_wrap(x, width = 25)) + +# labs(x = "", +# y = "Area covered by cluster (km2)", +# fill = "Rural / Urban Classification", +# title = "Composition of clusters: Rural / Urban", +# subtitle = "Before and after intersecting with bus network", +# caption = "border: area covered by entire cluster \nfill: area covered by cluster after intersection with bus network ") + +# guides(color = "none", alpha = "none") + +# scale_alpha_identity() + # Maintain alpha value +# theme_bw() + +# theme(axis.text.x = element_blank(), +# axis.ticks.x=element_blank(), +# legend.position = "bottom", +# legend.title = element_blank(), +# plot.caption = element_text(hjust = 0)) + +# guides(fill = guide_legend(nrow = 3)) + +# facet_wrap(facets = vars(cluster)) +# +# ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6) +# +# +# +# +# +# # ---------- PLots of line bearings in each cluster +# +# # --- calculate bearings +# +# od_demand_figures_bearings <- od_demand_figures_filt %>% +# mutate(bearing = stplanr::line_bearing(.), +# bearing_adjusted = case_when(bearing < 0 ~ bearing + 360, +# .default = bearing), +# bearing_180 = case_when(bearing < 0 ~ bearing * -1, +# .default = bearing)) +# +# +# +# # add length column +# od_demand_figures_bearings <- od_demand_figures_bearings %>% +# mutate(distance_m = units::drop_units(sf::st_length(.))) +# +# # --- get columns for distance and bearing "groups" - for facet plots cut distance angle groups for plots +# +# +# # Define the breaks for the buckets +# #breaks_angle <- seq(-10, 370, by = 20) +# breaks_angle <- seq(0, 360, by = 30) +# #breaks_angle <- seq(0, 180, by = 15) +# breaks_distance <- seq(0, 50, by = 10) +# +# od_demand_figures_bearings <- od_demand_figures_bearings %>% +# mutate(bucket = cut(bearing_adjusted, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), +# # mutate(bucket = cut(bearing_180, breaks = breaks_angle, right = FALSE, include.lowest = TRUE), +# bucket_distance = cut(distance_m / 1000, breaks = breaks_distance, right = FALSE, include.lowest = TRUE)) %>% +# mutate(bucket_distance = fct_rev(bucket_distance)) +# +# # Ensure the bucket factor levels are ordered correctly +# #bearings_cat$bucket <- factor(bearings_cat$bucket, levels = unique(bearings_cat$bucket), ordered = TRUE) +# +# +# # --- Plot with no of lines only (geom_bar()) +# +# +# # bar +# ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + +# geom_bar() + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1), +# legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_bar_bearing_y_ods_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) +# +# +# # circle +# ggplot(od_demand_figures_bearings, aes(x = bucket, fill = bucket_distance)) + +# geom_bar() + +# coord_polar(start = -0.1) + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "OD pairs", fill = "OD Pair Length") + +# #scale_x_discrete(labels = breaks_angle) + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +# +# +# +# # --- Plot with total commuters geom_col() +# +# +# # bar +# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + +# geom_col() + +# #coord_polar() + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1), +# legend.position = "bottom") +# +# ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_cluster_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 8) +# +# # circle +# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + +# geom_col() + +# coord_radial(rotate.angle = TRUE) + +# facet_wrap(facets = "cluster") + +# labs(x = "Bearing", y = "Commuters", fill = "OD Pair Length") + +# #scale_x_discrete(labels = breaks_angle) + +# scale_x_discrete(labels = breaks_angle) + +# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +# +# +# +# +# # TESTING FACET GRID +# +# # Select labels to print (one in every three) +# labels_to_print <- levels(od_demand_figures_bearings$bucket)[seq(1, length(levels(od_demand_figures_bearings$bucket)), by = 3)] +# +# +# ggplot(od_demand_figures_bearings, aes(x = bucket, y = commute_all, fill = bucket_distance)) + +# geom_col() + +# #coord_polar() + +# facet_grid(bucket_distance ~ cluster, +# drop = TRUE) + +# labs(x = "Bearing", y = "Total Commuters", fill = "OD Pair Length") + +# # scale_x_discrete(labels = breaks_angle) + +# scale_x_discrete(labels = function(x) ifelse(x %in% labels_to_print, x, "")) + # Print one in every three labels # theme_minimal() + +# theme_bw() + +# theme(axis.text.x = element_text(angle = 60, hjust = 1), +# legend.position = "bottom") +# +# +# ggsave(paste0(plots_path, "figure_bar_bearing_y_commuters_facet_grid_cluster_distance_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 10) +# +# +# +# +# +# +# +# +# +# +# diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R index fe8ac10..de523f8 100644 --- a/code/demand_cluster_flows_prep.R +++ b/code/demand_cluster_flows_prep.R @@ -1,6 +1,6 @@ # the purpose of this script is to prepare the flow data for spatial clustering # # This is done by jittering the flows so that it is not concentrated at zone centroids - +# library(tidyverse) library(sf) #library(lwgeom) @@ -18,8 +18,7 @@ source("R/filter_od_matrix.R") ########## ----------------------- Read in the data ----------------------- ########## # is the data disaggregated by mode? -mode <- TRUE -#mode <- FALSE +mode <- FALSE # ----------- 1. Study area @@ -46,20 +45,12 @@ study_area <- study_area %>% # Demand (census) + supply (travel time) data -if(mode == FALSE){ - # data with "commute_all" only - #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) -} else{ - # data with modes - #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) -} +od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) -# filter to specific combination -# TODO: get seperate flows for car and pt, and keep two combinations -od_demand <- od_demand %>% - filter(combination == "pt_wkday_morning") +# # filter to specific combination +# # TODO: get seperate flows for car and pt, and keep two combinations +# od_demand <- od_demand %>% +# filter(combination == "pt_wkday_morning") od_demand <- od_demand %>% select(-distance_m) @@ -112,10 +103,117 @@ sub_zones <- sub_zones %>% # rename population density column sub_zones <- sub_zones %>% - rename(population = gbr_pd_2020_1km_UNadj) + rename(population = gbr_pd_2020_1km_UNadj) %>% + # odjitter samples with replacement. Higher weighted subpoints will keep getting sampled + # Get cube root to increase probability of other points being sampled + mutate(population = population^(1/3)) + + +# --- Create more points (if we don't have enough points, then odjitter may not find a feasible solution) + + +#' Split Points into Multiple Offset Points with CRS Transformation +#' +#' This function takes an `sf` object containing spatial points and splits each +#' point into multiple new points that are offset from the original points. +#' Each new point represents an equal share of a specified numeric column value, +#' and is randomly distributed within a given distance range from the original +#' point. The function transforms the CRS to a specified metric CRS for +#' processing and then converts it back to the original CRS. +#' +#' @param sf_points An `sf` object representing multiple spatial points with +#' geometry and a numeric column to be split. +#' @param col_to_split A string indicating the name of the numeric column to +#' be split across the new points. +#' @param splits An integer indicating the number of new points to generate +#' from each original point. +#' @param offset_dist_min A numeric value specifying the minimum distance +#' (in meters) for the offsets of the new points from the original points. +#' @param offset_dist_max A numeric value specifying the maximum distance +#' (in meters) for the offsets of the new points from the original points. +#' @param target_crs A numeric EPSG code or a PROJ string for the target +#' metric CRS to be used during processing. +#' +#' @return An `sf` object containing the new points, with the specified +#' numeric column value equally distributed among the generated points. +#' +#' @examples +#' # Example usage +#' library(sf) +#' sf_points <- st_as_sf(data.frame(id = 1, +#' population = 100, +#' geometry = st_sfc(st_point(c(1, 1)), +#' crs = 4326))) +#' result <- split_points(sf_points, col_to_split = "population", +#' splits = 5, offset_dist_min = 10, +#' offset_dist_max = 20, target_crs = 32630) +#' plot(st_geometry(result)) +split_points <- function(sf_points, col_to_split, splits, offset_dist_min, + offset_dist_max, target_crs) { + + # Ensure that the specified column exists in the sf object + if (!col_to_split %in% names(sf_points)) { + stop(paste("Column", col_to_split, "does not exist in the sf object.")) + } + + # Store the original CRS + original_crs <- st_crs(sf_points) + + # Transform to the target CRS + sf_points_metric <- st_transform(sf_points, target_crs) + + # Create an empty list to store new points + new_points_list <- vector("list", nrow(sf_points_metric)) + + # Loop through each point in the sf object + for (i in 1:nrow(sf_points_metric)) { + point <- sf_points_metric[i, ] + population <- point[[col_to_split]] # Get the value of the specified column + + # Calculate new population per point + new_col_to_split <- population / splits + + # Generate new points with offsets + offsets <- lapply(1:splits, function(index) { + angle <- runif(1, 0, 2 * pi) # Random angle in radians + offset_dist <- runif(1, min = offset_dist_min, max = offset_dist_max) + offset_x <- offset_dist * cos(angle) + offset_y <- offset_dist * sin(angle) + st_point(c(st_coordinates(point)[1] + offset_x, + st_coordinates(point)[2] + offset_y)) + }) + + # Create sf object for new points + new_points_sf <- st_sf( + # TODO: take column name from funciton argument instead of hardcoding + population = rep(new_col_to_split, splits), # Dynamically assign the column name + geometry = st_sfc(offsets, crs = st_crs(point)) + ) + + # Append the new points sf object to the list + new_points_list[[i]] <- new_points_sf + } + + # Combine all new points into a single sf object + result_sf <- do.call(rbind, new_points_list) + + # Transform back to the original CRS + result_sf <- st_transform(result_sf, original_crs) + + return(result_sf) +} + +# apply function +sub_zones_2 <- split_points(sub_zones, + col_to_split = "population", + splits = 7, + offset_dist_min = 200, + offset_dist_max = 700, + target_crs = 3857) +####### ##### ----- STEP 2: Jittering @@ -124,8 +222,11 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE) # confirm it's empty dir(tempdir()) +# selected_combination = "pt_wkday_evening" + od_demand_for_jittering <- od_demand_filtered %>% - select(Origin, Destination, starts_with("commute_")) + #filter(combination == selected_combination) %>% + select(Origin, Destination, total_flow, combination) # arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details @@ -138,9 +239,9 @@ od_demand_jittered = odjitter::jitter( origin_key = "Origin", destination_key = "Destination", # column with the flows (to be disaggregated) - disaggregation_key = "commute_all", + disaggregation_key = "total_flow", # What's the maximum number of trips per output OD row that's allowed? - disaggregation_threshold = 95, + disaggregation_threshold = 20, # ----- arguments for ZONES ----- # @@ -150,7 +251,7 @@ od_demand_jittered = odjitter::jitter( # ----- Arguments for SUBPOINTS ----- # # subpoints to jitter origins and destinations to - subpoints = sub_zones, + subpoints = sub_zones_2, # # alternatively, define different points for origins and destinations # subpoints_origins = points_home, # subpoints_destinations = points_work, @@ -163,16 +264,37 @@ od_demand_jittered = odjitter::jitter( # ----- arguments OTHER ----- # # Guarantee that jittered origin and destination points are at least this distance apart - min_distance_meters = 500, + min_distance_meters = 100, deduplicate_pairs = TRUE ) +# TODO: remove when jittering is fixed +# od_demand_jittered <- od_demand_filtered + # jittered returns fractions. Round them od_demand_jittered <- od_demand_jittered %>% - mutate(across(starts_with("commute_"), round)) + mutate(across(starts_with("total_flow"), round)) + +# # ---------- check if new ODs are in the same zone paairs as the old ODs ---------- +od_demand_jittered_test <- od_demand_jittered %>% + mutate(Origin_new = lwgeom::st_startpoint(geometry), + Destination_new = lwgeom::st_endpoint(geometry)) %>% + st_set_geometry("Origin_new") %>% + st_join(study_area %>% select(MSOA21CD) %>% + rename(MSOA_study_area_O = MSOA21CD)) %>% + st_set_geometry("Destination_new") %>% + st_join(study_area %>% select(MSOA21CD) %>% + rename(MSOA_study_area_D = MSOA21CD)) %>% + mutate(Origin_match = MSOA_study_area_O == Origin, + Destination_match = MSOA_study_area_D == Destination) +# plot to check +plot(st_geometry(study_area)) +plot(st_geometry(od_demand_jittered %>% + filter(Origin == "E02002331", Destination == "E02002335")), + add = TRUE, col = "red") ########## ----------------------- Decide on the SCENARIOS we want to analyse ----------------------- ########## @@ -183,7 +305,6 @@ od_demand_jittered <- od_demand_jittered %>% # ----- Option 1: All OD pairs od_demand_1 <- od_demand_filtered - # ----- Option 2: OD pairs with poor PT supply (many transfers or low travel speed) od_demand_2 <- od_demand_filtered %>% @@ -224,18 +345,13 @@ od_demand_jittered_scenarios <- od_demand_jittered %>% TRUE ~ 0) ) +# remove rows that have no flows +od_demand_jittered_scenarios = od_demand_jittered_scenarios %>% + filter(total_flow != 0) ### Save the sfs for each scenario - -if(mode == FALSE){ - # data with "commute_all" only - st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson"), delete_dsn = TRUE) - -} else{ - # data with modes - st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson"), delete_dsn = TRUE) - } +st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"), delete_dsn = TRUE) @@ -363,8 +479,3 @@ if(mode == FALSE){ # # # density plot: facet by demand percentile: Keep potential_demand_equal_split = NA (replace with 0) # -# -# -# -# -# diff --git a/code/demand_cluster_flows_run.R b/code/demand_cluster_flows_run.R new file mode 100644 index 0000000..556bc2b --- /dev/null +++ b/code/demand_cluster_flows_run.R @@ -0,0 +1,290 @@ + + +# -------------------- Clusteting + + +sensitivity = TRUE +day_time = "pt_wkday_06_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_09_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_12_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_15_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +sensitivity = TRUE +day_time = "pt_wkday_18_30" +source("code/demand_cluster_flows.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = ls()) +gc() + +geography = "MSOA" + +# --- Compare clusters at different points in the day +dbscan_sensitivity_res_wkday_06_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_06_30") +dbscan_sensitivity_res_wkday_09_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_09_30") +dbscan_sensitivity_res_wkday_12_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_12_30") +dbscan_sensitivity_res_wkday_15_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_15_30") +dbscan_sensitivity_res_wkday_18_30 = arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_pt_wkday_morning.parquet")) %>% + mutate(scenario = "pt_wkday_18_30") + + +dbscan_sensitivity_res_compare = bind_rows(dbscan_sensitivity_res_wkday_06_30, + dbscan_sensitivity_res_wkday_09_30, + dbscan_sensitivity_res_wkday_12_30, + dbscan_sensitivity_res_wkday_15_30, + dbscan_sensitivity_res_wkday_18_30 + ) + + +dbscan_sensitivity_res_compare %>% + filter(cluster != 0) %>% + group_by(id) %>% + #mutate(clusters = n()) %>% + # How many clusters have more than 5 od pairs in them? + mutate(clusters = sum(size > 30)) %>% + ungroup() %>% + filter(clusters > 25) %>% + ggplot(aes(x = cluster, y = size, fill = commuters_sum)) + + geom_col() + + scale_y_continuous(trans='log10') + + facet_grid(id ~ scenario, scales = "fixed") + + labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}", + subtitle = "Parameter combinations with > 25 clusters having at least 30 od pairs each", + x = "Cluster no.", + y = "No. of od pairs in cluster", + fill= "No. of commuters") + + theme_bw() + + +ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_compare.png"), width = 14, height = 10) + +# minimum number of commuters in a cluster for it to be part of our analysis +commuters_sum_minimum = 150 + +# -------------- Plots +day_time = "pt_wkday_06_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = setdiff(ls(), "commuters_sum_minimum")) +gc() + + +day_time = "pt_wkday_09_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = setdiff(ls(), "commuters_sum_minimum")) +gc() + + +day_time = "pt_wkday_12_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = setdiff(ls(), "commuters_sum_minimum")) +gc() + + +day_time = "pt_wkday_15_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = setdiff(ls(), "commuters_sum_minimum")) +gc() + + +day_time = "pt_wkday_18_30" +source("code/demand_cluster_flows_maps_temporal.R") +print(paste0("finished scenario: ", day_time)) +# clear environment +rm(list = setdiff(ls(), "commuters_sum_minimum")) +gc() + + + +# ----- Facet map with different times of day + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") +# ---------------------- Load in data + +# -------- 1. administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +study_area <- study_area %>% + st_cast("MULTIPOLYGON") + +# move the geographic ID to the first column. od::points_to_od() only keeps the first column as ID +geoid_col = paste0(geography, "21CD") + +study_area <- study_area %>% + relocate(all_of(geoid_col), .before = everything()) + + +# -------- 2. gtfs +gtfs_bus <- st_read("data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson") + + +# -------- 3. Pop density base layer + +# --- pop density 1km grid +oa_pop_density <- stars::read_stars("data/external/population_density_grid_uk/gbr_pd_2020_1km_UNadj.tif") +# crop to study area +oa_pop_density_crop <- st_crop(oa_pop_density, study_area) + +# -------- 4. DRT operating zone polygons + +# Specify the directory containing the GeoJSON files +directory_path <- paste0("data/processed/plots/eda/od_clustering/MSOA/temporal/polygons_combined/", commuters_sum_minimum, "/") + +# Get the list of all GeoJSON files of DRT boundaries (1 file per scenario) +geojson_files <- list.files(directory_path, pattern = "\\.geojson$", full.names = TRUE) + +# Read each GeoJSON file into a list of sf objects +geojson_list <- purrr::map(geojson_files, st_read) +# prepare for plotting +combined_geojson <- do.call(rbind, geojson_list) + +combined_geojson <- combined_geojson %>% + st_transform(st_crs(study_area)) %>% + st_make_valid() + +# plot +tm_shape(st_union(study_area)) + + tm_borders(lwd =2, + col = "grey15") + + tm_shape(oa_pop_density_crop) + + tm_raster(title = "People / Km2", + palette = "Blues", + alpha = 0.5, + style = "log10_pretty") + + # bus layer + tm_shape(gtfs_bus %>% + filter(startsWith(scenario, "pt_wkday")) %>% + mutate(headway_inv = (1/headway_secs) * 3600) %>% + filter(headway_secs < 7200)) + + tm_lines(col = "darkred", + lwd = "headway_inv", + scale = 5.5, + palette = "-YlOrRd", + style = "pretty", + legend.col.show = FALSE, + alpha = 0.1, + title.lwd = "Buses/Hour", + #legend.lwd.is.portrait = FALSE + ) + + tm_facets(by = "scenario", + #by = "commute_all", + free.coords = FALSE, + nrow = 2, + showNA = FALSE) + + # ---- clusters + # poly border + tm_shape(combined_geojson %>% + st_buffer(1000)) + + tm_borders(col = "darkgreen", + lwd = 3.5, + lty = "dashed") + + tm_facets(by = "scenario", + #by = "commute_all", + free.coords = FALSE, + nrow = 2, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Potential DRT Operating Zones (Temporal Variation)"), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + #legend.outside = TRUE, + #legend.outside.position = "bottom", + #legend.stack = "horizontal", + # remove panel headers + # panel.show = FALSE, + panel.label.size = 1, + panel.label.bg.color = NA, + #panel.labels = 1:length(unique(clusters_vis_mode_poly_filt3$cluster)), + frame = FALSE) + + # add a couple of legends + tm_add_legend(type = "line", labels = 'Potential DRT service area', col = 'darkgreen', lwd = 2) -> map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES + +map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES + +plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/temporal/", "min_commuters_", commuters_sum_minimum) + + +tmap_save(tm = map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES, filename = paste0(plots_path, "map_cluster_results_gtfs_overline_poly_bus_diff_pop_density_facet_ALL_TIMES.png"), width = 12, dpi = 1080, asp = 0) + + + + + + + + + + + + + + + + + + + + + + + + + + + +# ------------------------ Test clustering input -------------------------- # + + +od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson")) + + +od_demand_jittered %>% + st_drop_geometry() %>% + pivot_longer(cols = c(scenario_1, scenario_2, scenario_3), + names_to = "scenario") %>% + group_by(combination, scenario) %>% + summarise(number_of_ods = sum(value == 1)) + diff --git a/code/demand_on_buses.R b/code/demand_on_buses.R index 12a1466..a2ee745 100644 --- a/code/demand_on_buses.R +++ b/code/demand_on_buses.R @@ -23,7 +23,7 @@ source("R/filter_od_matrix.R") ########## ----------------------- Read in the data ----------------------- ########## # is the demand data disaggregated by mode? -mode = TRUE +mode = FALSE # ----------- 1. Study area @@ -51,28 +51,21 @@ gtfs_paths = paste0(gtfs_dir, gtfs_names) # --- read in the feeds -gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus", gtfs_paths)]) +gtfs_bus <- gtfstools::read_gtfs(gtfs_paths[grepl("bus_temporal", gtfs_paths)]) #gtfs_rail <- gtfstools::read_gtfs(gtfs_paths[grepl("rail", gtfs_paths)]) -# --- filter the feeds to a specific point in time -gtfs_trip_ids <- gtfs_bus$frequencies %>% - filter(start_time == "07:30:00") - -gtfs_bus <- gtfs_bus %>% - gtfstools::filter_by_trip_id(gtfs_trip_ids$trip_id) +# # --- filter the feeds to a specific point in time +# gtfs_trip_ids <- gtfs_bus$frequencies %>% +# filter(start_time == "07:30:00") +# +# gtfs_bus <- gtfs_bus %>% +# gtfstools::filter_by_trip_id(gtfs_trip_ids$trip_id) # ----------- 3. Census OD data # Demand (census) + supply (travel time) data -if(mode == TRUE){ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet")) - -} else{ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet")) -} - -# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) +od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed.parquet")) # # columns to reference (they differ based on geography) # from_id_col = paste0(geography, "21CD_home") @@ -107,8 +100,8 @@ od_supply <- od_supply %>% # save the output -arrow::write_parquet(od_supply, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage.parquet")) -#od_supply <- arrow::read_parquet( paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage.parquet")) +arrow::write_parquet(od_supply, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage_temporal.parquet")) +#od_supply <- arrow::read_parquet( paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_bus_coverage_temporal.parquet")) # --- remove od pairs with very short distance @@ -135,13 +128,12 @@ od_supply_filtered = filter_matrix_by_distance(zones = study_area, # filter(combination == "pt_wkday_morning") od_sd <- od_demand %>% - filter(combination == "pt_wkday_morning") %>% left_join(od_supply_filtered %>% select(-distance_m) %>% st_drop_geometry(), by = c("Origin", # = from_id_col, - "Destination", # = to_id_col, - "departure_time" = "start_time")) + "Destination", # = to_id_col, + "departure_time" = "start_time")) # # save output @@ -153,7 +145,7 @@ od_sd <- od_demand %>% # Method 1: all_to_all trips_sd_1 <- od_sd %>% group_by(trip_id, departure_time, combination) %>% - summarise(potential_demand_all_to_all = sum(commute_all, na.rm = TRUE)) %>% + summarise(potential_demand_all_to_all = sum(total_flow, na.rm = TRUE)) %>% ungroup() # Method 2: frequency-based @@ -162,7 +154,7 @@ trips_sd_2 <- od_sd %>% # get number of passengers on each route for each OD pair mutate(group_id = cur_group_id(), frequency_min = 3600/headway_secs, - commute_route = round((commute_all * frequency_min) / sum(frequency_min))) %>% + commute_route = round((total_flow * frequency_min) / sum(frequency_min))) %>% ungroup() %>% # sum over the route group_by(trip_id, departure_time, combination) %>% @@ -173,7 +165,7 @@ trips_sd_2 <- od_sd %>% trips_sd_3 <- od_sd %>% group_by(departure_time, combination, Origin, Destination) %>% # get number of passengers on each route for each OD pair - mutate(commute_route = round((commute_all / n()))) %>% + mutate(commute_route = round((total_flow / n()))) %>% ungroup() %>% # sum over the route group_by(trip_id, departure_time, combination) %>% @@ -204,12 +196,8 @@ od_trips_sd <- od_trips_sd %>% dplyr::top_n(1, potential_demand_equal_split) %>% ungroup() -if(mode == TRUE){ - arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet")) -} else{ - arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) -} +arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet")) # ----------- 4. Add geometry to plot results @@ -229,7 +217,7 @@ trips_sd_sf <- trips_sd %>% # ----------- 5. Save the output -st_write(trips_sd_sf, paste0("data/processed/travel_demand/trips_potential_demand_census_", geography, ".geojson"), delete_dsn = TRUE) +st_write(trips_sd_sf, paste0("data/processed/travel_demand/trips_potential_demand_temporal_", geography, ".geojson"), delete_dsn = TRUE) @@ -245,18 +233,24 @@ trips_sd_sf_shape_sum <- trips_sd_sf %>% # ########### --------------------------- 5. Plots --------------------------- ########## + # ----------- Potential demand histograms ----------- # -plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/" +plots_path <- "data/processed/plots/eda/speed_demand_cutoffs/temporal/" # TRIP LEVEL +trips_sd = trips_sd %>% + mutate(combination = factor(combination, levels = c("pt_wkday_06_30", "pt_wkday_09_30", "pt_wkday_12_30", "pt_wkday_15_30", "pt_wkday_18_30"))) + + # equal split trips_sd %>% #mutate(potential_demand_equal_split = replace_na(potential_demand_equal_split, 0)) %>% ggplot(aes(x = potential_demand_equal_split)) + geom_histogram(bins = 50, alpha = 0.8) + + facet_grid(combination ~.) + labs(title = "Potential demand on PT routes", subtitle = "Equal split of demand from each OD\nonto routes that serve it", x = "Potential demand (no. of passengers)", @@ -276,7 +270,7 @@ trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.5, alpha = 0.8) + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand (no. of passengers using route) - Thousands", @@ -304,7 +298,7 @@ trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_histogram(binwidth = 0.1, alpha = 0.8) + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand (no. of passengers using route) - Thousands", @@ -319,6 +313,9 @@ ggsave(filename = paste0(plots_path, "plot_hist_potential_demand_routes_two_meth # ----- OD LEVEL +od_trips_sd = od_trips_sd %>% + mutate(combination = factor(combination, levels = c("pt_wkday_06_30", "pt_wkday_09_30", "pt_wkday_12_30", "pt_wkday_15_30", "pt_wkday_18_30"))) + # equal split od_trips_sd %>% @@ -334,7 +331,8 @@ od_trips_sd %>% ungroup() %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + - geom_histogram(binwidth = 0.5, alpha = 0.8) + + geom_histogram(binwidth = 0.2, alpha = 0.8) + + facet_grid(.~combination) + #geom_density()+ #scale_y_log10() + #geom_vline(data = percentiles_df, aes(xintercept = potential_demand, color = "blue")) + @@ -368,9 +366,9 @@ od_trips_sd %>% ungroup() %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + - geom_histogram(binwidth = 0.5, alpha = 0.8) + + geom_histogram(binwidth = 0.2, alpha = 0.8) + #scale_y_log10() + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand on busiest route serving OD pair ('000)", @@ -398,9 +396,9 @@ od_trips_sd %>% ungroup() %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + - geom_histogram(binwidth = 0.5, alpha = 0.8) + + geom_histogram(binwidth = 0.2, alpha = 0.8) + #scale_y_log10() + - facet_wrap(~ distribution_type) + + facet_grid(combination ~ distribution_type) + labs(title = "Potential demand on PT routes", subtitle = "Different approaches to distributing OD demand", x = "Potential demand on busiest route serving OD pair ('000)", @@ -447,6 +445,25 @@ trips_sd %>% ggsave(filename = paste0(plots_path, "plot_dens_potential_demand_trips_equal_split.png")) +trips_sd %>% + #mutate(potential_demand_equal_split = replace_na(potential_demand_equal_split, 0)) %>% + ggplot(aes(x = potential_demand_equal_split)) + + geom_density()+ + #scale_y_log10() + + geom_vline(data = percentiles_trip, aes(xintercept = potential_demand, color = labels))+ + facet_grid(.~combination) + + labs(title = "Potential demand on PT routes", + subtitle = "Equal split of demand from each OD\nonto routes that serve it", + x = "Potential demand (no. of passengers)", + color = "Percentile") + + theme( + axis.text.x = element_text(angle = 45, hjust = 1), + axis.title.x = element_text(size = 8), + axis.title.y = element_text(size = 8), + legend.position = "bottom" + ) + +ggsave(filename = paste0(plots_path, "plot_dens_potential_demand_trips_equal_split_facet.png")) # --- OD level @@ -473,6 +490,7 @@ od_trips_sd %>% mutate(potential_demand = round(potential_demand / 1000, 2)) %>% ggplot(aes(x = potential_demand)) + geom_density()+ + facet_grid(.~combination) + #scale_y_log10() + geom_vline(data = percentiles_df, aes(xintercept = potential_demand, color = labels))+ scale_color_brewer(type = "qual", palette = "Set1") + @@ -515,6 +533,16 @@ percentiles_speed <- data.frame( labels = c("10", "25", "50", "75", "90") ) +percentiles_speed_combination <- od_trips_sd %>% + group_by(combination) %>% + summarise( + percentiles = list(c(0.1, 0.25, 0.5, 0.75, 0.9)), + speed_kph = list(quantile(speed_kph, c(0.1, 0.25, 0.5, 0.75, 0.9))), + labels = list(c("10", "25", "50", "75", "90")) + ) %>% + unnest(cols = c(percentiles, speed_kph, labels)) + + od_trips_sd %>% ggplot(aes(x = speed_kph)) + @@ -536,6 +564,8 @@ od_trips_sd %>% ggsave(filename = paste0(plots_path, "plot_dens_speeds_od_equal_split.png")) + + # # # # ----- a) potential ridership on all different trips diff --git a/code/eda_travel_demand_nobus.R b/code/eda_travel_demand_nobus.R index 33020a2..11441e7 100644 --- a/code/eda_travel_demand_nobus.R +++ b/code/eda_travel_demand_nobus.R @@ -55,8 +55,8 @@ plots_path <- paste0("data/processed/plots/eda/flows_no_direct/", geography, "/" # ---------- 2. Filter GTFS by time of day (one feed for each combination) # feeds -gtfs_bus <- gtfs_feeds$study_area_gtfs_bus_f.zip -gtfs_rail <- gtfs_feeds$study_area_gtfs_rail_f.zip +gtfs_bus <- gtfs_feeds$study_area_gtfs_bus_temporal_f.zip +gtfs_rail <- gtfs_feeds$study_area_gtfs_rail_temporal_f.zip # add shapes.txt to the rail feed gtfs_rail <- UK2GTFS::ATOC_shapes(gtfs_rail) @@ -65,15 +65,21 @@ gtfs_rail <- UK2GTFS::ATOC_shapes(gtfs_rail) scenarios <- tribble( ~scenario, ~date, ~min_departure_time, ~max_arrival_time, # public transport at different times of day / week - "pt_wkday_morning", "2023-08-14", "06:00:00", "09:00:00", - "pt_wkday_afternoon", "2023-08-14", "11:00:00", "14:00:00", - "pt_wkday_evening", "2023-08-14", "17:00:00", "20:00:00", - "pt_wkday_night", "2023-08-14", "21:30:00", "23:59:00:00", - "pt_wkend_morning", "2023-08-13", "06:00:00", "09:00:00", - "pt_wkend_evening", "2023-08-13", "17:00:00", "20:00:00", + "pt_wkday_06_30", "2023-08-14", "05:00:00", "08:00:00", + "pt_wkday_09_30", "2023-08-14", "08:00:00", "11:00:00", + "pt_wkday_12_30", "2023-08-14", "11:00:00", "14:00:00", + "pt_wkday_15_30", "2023-08-14", "14:00:00", "17:00:00", + "pt_wkday_18_30", "2023-08-14", "17:00:00", "20:00:00", + "pt_wkend_06_30", "2023-08-14", "05:00:00", "08:00:00", + "pt_wkend_09_30", "2023-08-14", "08:00:00", "11:00:00", + "pt_wkend_12_30", "2023-08-14", "11:00:00", "14:00:00", + "pt_wkend_15_30", "2023-08-14", "14:00:00", "17:00:00", + "pt_wkend_18_30", "2023-08-14", "17:00:00", "20:00:00", ) + + # Bus gtfs_bus_filtered <- filter_gtfs_feed_internal(gtfs_feed = gtfs_bus, scenarios = scenarios, @@ -97,8 +103,8 @@ gtfs_bus_filtered_df <- bind_rows(gtfs_bus_filtered) gtfs_rail_filtered_df <- bind_rows(gtfs_rail_filtered) # save the output -st_write(gtfs_bus_filtered_df, "data/interim/gtfs_freq/gtfs_bus_sf.geojson", delete_dsn =TRUE) -st_write(gtfs_rail_filtered_df, "data/interim/gtfs_freq/gtfs_rail_sf.geojson", delete_dsn =TRUE) +st_write(gtfs_bus_filtered_df, "data/interim/gtfs_freq/gtfs_bus_sf_temporal.geojson", delete_dsn =TRUE) +st_write(gtfs_rail_filtered_df, "data/interim/gtfs_freq/gtfs_rail_sf_temporal.geojson", delete_dsn =TRUE) # Bus headway (morning peak) @@ -110,7 +116,7 @@ tm_shape(study_area) + alpha = 0.5) + tm_shape(gtfs_bus_filtered_df %>% mutate(headway_inv = 1/headway_secs) %>% - filter(headway_secs < 3600, scenario == "pt_wkday_morning")) + + filter(headway_secs < 3600, scenario == "pt_wkday_06_30")) + tm_lines(col = "headway_secs", lwd = "headway_inv", scale = 5, diff --git a/code/gtfs_convert_stop_times_to_freq.R b/code/gtfs_convert_stop_times_to_freq.R index 5089463..4acf1a1 100644 --- a/code/gtfs_convert_stop_times_to_freq.R +++ b/code/gtfs_convert_stop_times_to_freq.R @@ -21,16 +21,16 @@ gtfs_rail <- tidytransit::read_gtfs("data/interim/study_area_gtfs_rail.zip") # apply the function gtfs_freq_bus = stop_times_to_frequencies(gtfs = gtfs_bus, - time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00"), - end_time = c("05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00", "23:59:00"))) + time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "06:30:00", "09:30:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00"), + end_time = c("05:00:00", "06:30:00", "09:00:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00", "23:59:00"))) # apply the function gtfs_freq_rail = stop_times_to_frequencies(gtfs = gtfs_rail, - time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00"), - end_time = c("05:00:00", "07:30:00", "09:00:00", "12:30:00", "15:00:00", "18:30:00", "21:00:00", "23:30:00", "23:59:00"))) + time_ranges = tibble(start_time = c("00:00:00", "05:00:00", "06:30:00", "09:30:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00"), + end_time = c("05:00:00", "06:30:00", "09:00:00", "12:30:00", "15:30:00", "18:30:00", "21:30:00", "23:30:00", "23:59:00"))) # save the new feed -tidytransit::write_gtfs(gtfs_freq_bus, "data/interim/gtfs_freq/study_area_gtfs_bus_f.zip") -tidytransit::write_gtfs(gtfs_freq_rail, "data/interim/gtfs_freq/study_area_gtfs_rail_f.zip") +tidytransit::write_gtfs(gtfs_freq_bus, "data/interim/gtfs_freq/study_area_gtfs_bus_temporal_f.zip") +tidytransit::write_gtfs(gtfs_freq_rail, "data/interim/gtfs_freq/study_area_gtfs_rail_temporal_f.zip") diff --git a/code/performance_od.R b/code/performance_od.R index 1ca4dbc..9f8b6c0 100644 --- a/code/performance_od.R +++ b/code/performance_od.R @@ -15,9 +15,10 @@ source("R/filter_od_matrix.R") geography = "MSOA" # are we processing the data that is disaggregated by mode? mode = FALSE +source = "cpc" # --- where do we want to save the plots? -plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/") +plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/", source, "/") @@ -40,12 +41,7 @@ study_area <- study_area %>% # ----- Travel time and demand matrix -if(mode == TRUE){ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet")) - -} else{ - od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet")) -} +od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), ".parquet")) @@ -60,7 +56,8 @@ to_id_col = paste0(geography, "21CD_work") # rename columns for function below od_demand <- od_demand %>% rename(Origin = all_of(from_id_col), - Destination = all_of(to_id_col)) + Destination = all_of(to_id_col)) %>% + relocate(Origin, Destination) @@ -98,7 +95,7 @@ od_demand_sf_rank <- od_demand_sf %>% speed_percentile_fct = cut(speed_percentile, breaks = seq(0, 1, by = 0.25), include.lowest = TRUE), - demand_percentile = percent_rank(commute_all), + demand_percentile = percent_rank(total_flow), demand_percentile_fct = cut(demand_percentile, breaks = seq(0, 1, by = 0.25), include.lowest = TRUE), @@ -108,12 +105,7 @@ od_demand_sf_rank <- od_demand_sf %>% # ---------- 5. Save the output -if(mode == TRUE){ - arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet")) - -} else{ - arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet")) -} +arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed.parquet")) @@ -131,10 +123,10 @@ tm_shape(study_area) + tm_borders(col = "grey60", alpha = 0.5) + tm_shape(od_demand_sf_rank %>% - filter(combination == "pt_wkday_morning") %>% + filter(combination == "pt_wkday_06_30") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -174,9 +166,9 @@ tm_shape(study_area) + tm_borders(col = "grey60", alpha = 0.5) + tm_shape(od_demand_sf_rank %>% - filter(combination == "pt_wkday_morning", n_rides == 1)) + - tm_lines(col = "commute_all", - lwd = "commute_all", + filter(combination == "pt_wkday_06_30", n_rides == 1)) + + tm_lines(col = "total_flow", + lwd = "total_flow", legend.lwd.show = FALSE, alpha = 0.8, title.col = "Demand between OD pair", @@ -215,9 +207,9 @@ tm_shape(study_area) + tm_fill(col = "grey80", alpha = 0.5) + tm_shape(od_demand_sf_rank %>% - filter(combination == "pt_wkday_morning", speed_percentile <= 0.25)) + - tm_lines(col = "commute_all", - lwd = "commute_all", + filter(combination == "pt_wkday_06_30", speed_percentile <= 0.25)) + + tm_lines(col = "total_flow", + lwd = "total_flow", scale = 10, palette = "RdYlGn", legend.lwd.show = FALSE, @@ -257,8 +249,8 @@ tm_shape(study_area) + tm_shape(od_demand_sf_rank %>% mutate(n_rides = round(n_rides)) %>% filter(speed_percentile <= 0.25, combination == "pt_wkday_morning")) + - tm_lines(col = "commute_all", - lwd = "commute_all", + tm_lines(col = "total_flow", + lwd = "total_flow", scale = 10, #palette = "Blues", #style = "quantile", @@ -310,7 +302,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning", !is.na(quadrant)) %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -354,7 +346,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning", quadrant == "low speed high demand") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -394,8 +386,8 @@ tm_shape(study_area) + tm_shape(od_demand_sf_rank %>% filter(combination == "pt_wkday_morning", quadrant == "low speed high demand", n_rides != 0) %>% mutate(n_rides = as.factor(round(n_rides)))) + - tm_lines(col = "commute_all", - lwd = "commute_all", + tm_lines(col = "total_flow", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "Demand between OD pair", @@ -443,7 +435,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning", quadrant == "low speed high demand") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -487,7 +479,7 @@ tm_shape(study_area) + filter(combination == "pt_wkday_morning") %>% mutate(n_rides = as.factor(round(n_rides)))) + tm_lines(col = "n_rides", - lwd = "commute_all", + lwd = "total_flow", #legend.lwd.show = FALSE, alpha = 0.5, title.col = "No. of rides required", @@ -514,133 +506,37 @@ tm_shape(study_area) + +# ----- New plots after getting CPC temporal data +tm_shape(study_area) + + tm_fill(col = "grey95") + + tm_shape(study_area) + + tm_borders(col = "grey60", + alpha = 0.5) + + tm_shape(od_demand_sf_rank %>% + filter(!is.na(quadrant)) %>% + mutate(n_rides = as.factor(round(n_rides)))) + + tm_lines(col = "n_rides", + lwd = "total_flow", + #legend.lwd.show = FALSE, + alpha = 0.5, + title.col = "No. of rides required", + title.lwd = "Demand between OD pair", + legend.col.is.portrait = FALSE, + palette = "Dark2", #-RdYlGn + style = "quantile", + scale = 15) + + tm_facets_grid("quadrant", "combination", + type = "stack", + free.coords = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = "Demand between OD pairs\nfaceted by combination of speed and demand (percentiles)\nbetween OD pairs", + main.title.size = 1.3, + main.title.color = "azure4", + main.title.position = "left", + legend.outside = TRUE, + legend.outside.position = "bottom", + legend.stack = "horizontal", + legend.title.size = 0.9, + frame = FALSE) -# ARCHIVE FOR NOW - -# -# -# ggplot(od_demand_sf_rank %>% -# filter(ride_time > 5), -# aes(x = distance_m/ride_time, y = speed_kph, color = combination)) + -# geom_point() -# -# -# ggplot(od_demand_sf_rank %>% -# filter(ride_time > 5, combination == "pt_wkday_morning"), -# aes(x = distance_m/ride_time, y = speed_kph)) + -# geom_point() -# -# ggplot(od_demand_sf_rank %>% -# filter(ride_time > 5, combination != "car"), -# aes(x = ride_time / total_time)) + -# geom_histogram() + -# facet_wrap(~combination, ncol = 3) -# -# -# -# # ----- exploratory plots -# ggplot(od_demand_sf_rank, aes(x = speed_kph, y = speed_percentile, color = combination)) + -# geom_point() -# -# # speed -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = speed_kph, y = speed_percentile, color = combination)) + -# geom_point() -# -# # demand -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = commute_all, y = demand_percentile, color = combination)) + -# geom_point() -# -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = demand_percentile, fill = combination)) + -# geom_histogram() + -# facet_wrap(~combination, ncol = 3) -# -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# ggplot(aes(x = commute_all, fill = combination)) + -# geom_histogram() + -# facet_wrap(~combination, ncol = 3) -# -# -# od_demand_sf_rank_f <- od_demand_sf_rank %>% -# filter(combination == "pt_wkday_morning") -# -# -# od_demand_sf_rank %>% -# filter(combination == "pt_wkday_morning") %>% -# # as.factor to avoid "continuous value to discrete scale" error -# ggplot(aes(x = speed_percentile, y = demand_percentile, color = distance_m)) + -# geom_point() + -# scale_color_distiller(palette="Reds", direction = 1) -# -# -# od_demand_sf_rank %>% -# filter(combination != "car") %>% -# # as.factor to avoid "contiuous value to discrete scale -# ggplot(aes(x = speed_percentile, y = demand_percentile, color = distance_m)) + -# geom_point() + -# scale_color_distiller(palette="Reds", direction = 1) + -# facet_wrap(~combination, ncol = 3) -# -# -# -# -# -# # round n_rides to whole number -# # TODO: move this further upstream (previous script) -# od_demand_sf_rank <- od_demand_sf_rank %>% -# mutate(n_rides = round(n_rides)) -# -# -# # ---------- 5. Create communities based on demand (or travel time?) -# # different community for PT and PVT -# -# # ----------- 5. Plots -# -# # ----------- prep the layer -# # TODO: remove this when we have multiple start times (currently the od_supply df only has 7:30 - WHY? ) -# od_demand_sf_rank <- od_demand_sf_rank %>% -# filter(combination == "pt_wkday_morning") -# -# -# # ----------- 6. Maps -# -# -# -# -# -# -# -# -# -# sfnetworks::as_sfnetwork(od_demand_sf_rank, directed = FALSE) -> x -# -# x %>% activate(nodes) %>% -# mutate(group = tidygraph::group_louvain(weights = `commute_all`)) -> x2 -# -# # OR -# # x2 = x %>% -# # morph(to_linegraph) %>% -# # mutate(group = tidygraph::group_louvain(weights = "commute_all")) %>% -# # unmorph() -# -# -# plot(st_geometry(x, "edges"), col = "grey", lwd = 0.5) -# plot(st_geometry(study_area)) -# -# x2 %>% -# activate("nodes") %>% -# st_as_sf() %>% -# transmute(group = as.factor(group)) %>% -# plot(lwd = 4, add = TRUE) -# -# -# -# -# diff --git a/code/routing_r5r_temporal.R b/code/routing_r5r_temporal.R new file mode 100644 index 0000000..5217aa3 --- /dev/null +++ b/code/routing_r5r_temporal.R @@ -0,0 +1,145 @@ +################################################################################################### +### The purpose of this script is to calculate a travel time matrix for each different mode ### +### combination. r5r is used for the calculations ### +################################################################################################### + +source("R/study_area_geographies.R") +source("R/r5r_routing_wrappers.R") +# source("code/routing_prep.R") + +library(tidyverse) +library(sf) +library(r5r) + +# increase the memory available to Java. Needs to be done at the beginning of the script +options(java.parameters = "-Xmx35G") # 3 gegabytes + +# define path where graph will be built (path with osm and pbf data) +graph_path <- paste0("data/interim/routing_graph/") +# define path where routing results will be saved +travel_time_path <- paste0("data/processed/travel_times/") + +geography <- "MSOA" + +# what number do we want to give to OD pairs that cannot be reached within our travel time threshold +na_time_replace <- 150 # 2.5 hours + +# create a directory to store the results +dir.create(paste0(travel_time_path, geography)) + +# ------------------------------------- PREPARE (BASE) OD MATRIX LAYER ------------------------------------- # + +study_area <- st_read("data/interim/study_area_boundary.geojson") + +# edit the study area to match the chosen resolution +study_area <- study_area_geographies(study_area = study_area, + geography = geography) + + +# r5r requires a POINT sf object with WGS84 CRS +study_area <- study_area %>% + st_transform(4326) %>% + st_centroid() + + + +# Function takes a base layer, gets the geometry centroid, and renames the id column that we pass to it into a standard name +prep_base_layer = function(layer, id_col){ + # rename existing ID column with "id" + id_col = sym(id_col) + layer = layer %>% + rename(id = !! id_col) + # transform crs + layer = layer %>% st_transform(4326) + # get centroid + layer = layer %>% #select(id) %>% + st_centroid() +} + +# apply the function +study_area_r5 <- prep_base_layer(layer = study_area, id_col = "OBJECTID") + + + +# ------------------------------------- DEFINE ROUTING PARAMETERS ------------------------------------- # + +scenarios <- tribble( + ~scenario, ~mode, ~departure_datetime, + # public transport at different times of day / week + # weekday + "pt_wkday_06_30", c("WALK", "TRANSIT"), "14-08-2023 06:30:00", + "pt_wkday_09_30", c("WALK", "TRANSIT"), "14-08-2023 09:30:00", + "pt_wkday_12_30", c("WALK", "TRANSIT"), "14-08-2023 12:30:00", + "pt_wkday_15_30", c("WALK", "TRANSIT"), "14-08-2023 15:30:00", + "pt_wkday_18_30", c("WALK", "TRANSIT"), "14-08-2023 18:30:00", + # weekend + "pt_wkend_06_30", c("WALK", "TRANSIT"), "13-08-2023 06:30:00", + "pt_wkend_09_30", c("WALK", "TRANSIT"), "13-08-2023 09:30:00", + "pt_wkend_12_30", c("WALK", "TRANSIT"), "13-08-2023 12:30:00", + "pt_wkend_15_30", c("WALK", "TRANSIT"), "13-08-2023 15:30:00", + "pt_wkend_18_30", c("WALK", "TRANSIT"), "13-08-2023 18:30:00", + # car (travel time is the same regardless of day / time) - unless we add congestion + #"car", c("CAR"), "14-08-2023 07:30:00", +) + + +# ------------------------------------- BUILD ROUTING GRAPH ------------------------------------- # + +# stop any running r5 instances +# r5r::stop_r5() +# java garbage collector to free up memory +#rJava::.jgc(R.gc = TRUE) + +# setup r5 +print("Setting up r5...") +r5r_core <- setup_r5(data_path = graph_path, + verbose = TRUE, + overwrite = TRUE) # turn to true once we have elevation + +print("Graph built...") +# ------------------------------------- CALCULATE TRAVEL TIME MATRIX ------------------------------------- # + + +# ---------- 2. route using r5r::expanded_travel_time_matrix. This gives you travel time broken down by journey components + +# --- apply the routing function + +# Option 1: story in memory + +# tt_expanded_results <- tt_matrix_expanded(#scenarios = scenarios, +# scenarios = scenarios[scenarios$scenario != "car", ], +# zone_layer = study_area_r5[1:10,], +# time_window = 5, +# storage_option = "memory") + +# Option 2: save to disk + +tt_matrix_expanded(scenarios = scenarios[scenarios$scenario != "car", ], + zone_layer = study_area_r5, + time_window = 10, + storage_option = "save", + save_format = "parquet", + folder_path = paste0(travel_time_path, geography, "/travel_time_matrix_expanded/temporal")) + + +# --- read in the parquet files +# read in all the files using purrr::map +files <- dir(paste0(travel_time_path, geography, "/travel_time_matrix_expanded/temporal"), full.names = TRUE) +tt_results_expanded <- map(files, arrow::read_parquet) +tt_results_expanded <- bind_rows(tt_results_expanded) + +# summarise the results to get one row per group. Currently we have one row for each minute in a time_window +tt_results_expanded_s <- summarise_ttm_expanded(ttm_expanded_results = tt_results_expanded) + +# save the results +arrow::write_parquet(tt_results_expanded_s, paste0(travel_time_path, geography, "/travel_time_matrix_expanded_temporal.parquet")) +#tt_results_expanded_s <- arrow::read_parquet(paste0(travel_time_path, geography, "/travel_time_matrix_expanded.parquet")) + + + + +# stop r5 +r5r::stop_r5(r5r_core) +# java garbage collector to free up memory +rJava::.jgc(R.gc = TRUE) + diff --git a/code/travel_demand_census.R b/code/travel_demand_census.R index 22f2b9e..f1eb09c 100644 --- a/code/travel_demand_census.R +++ b/code/travel_demand_census.R @@ -191,6 +191,6 @@ census_work_msoa_tt <- census_work_msoa %>% # save #write_csv(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.csv") -arrow::write_parquet(census_work_msoa_tt, "data/raw/travel_demand/od_census_2021/demand_study_area_msoa.parquet") +arrow::write_parquet(census_work_msoa_tt, ) diff --git a/code/travel_demand_temporal.R b/code/travel_demand_temporal.R new file mode 100644 index 0000000..37ea7f9 --- /dev/null +++ b/code/travel_demand_temporal.R @@ -0,0 +1,197 @@ +### the purpose of this script is to work with travel demand data from cpc ### +### the data is only available at MSOA level. It is cleaned and joined onto ### +### the travel time results + +library(tidyverse) +library(sf) + + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") + +# --------------- 1. READ IN THE DEMAND DATA + +# ------- Zoning layer (2011 MSOAs) + +zones <- read_csv("data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv") +# study area zones +zones_internal = zones %>% + filter(zone_type == 1) + +# ------- CPC data + +# Define the path to the ZIP file +zip_file_path <- "data/external/cpc/cpc_west_yorkshire_weekdays_march_may.zip" +# Create a temporary directory to extract the files +temp_dir <- tempdir() +# Extract the ZIP file to the temporary directory +unzip(zip_file_path, exdir = temp_dir) +# Get a list of all CSV files in the temporary directory +csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE) + +# Read each CSV file into a list of data frames +cpc_matrices <- purrr::map(csv_files, read.csv) + +# Extract the part of the filename after the underscore and before the .csv extension +file_names <- basename(csv_files) +short_names <- stringr::str_extract(file_names, "(?<=_).*(?=\\.csv)") + +# Name each data frame in the list by its original file name +names(cpc_matrices) <- short_names + +# --- Combine the dfs + +# Add a new column to each data frame with the name of the data frame +cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y)) + +# Combine into one df +cpc_matrices_all <- bind_rows(cpc_matrices) + +# replace NA values with 0 +cols <- c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb") + +cpc_matrices_all <- cpc_matrices_all %>% + mutate(across(all_of(cols), ~replace_na(., 0))) + +# Add column to sum all flow types +cpc_matrices_all <- cpc_matrices_all %>% + mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE)) + +# ------- Geo boundaries + +# England +msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp") + +# West Yorkshire (All zones with zone_type == 1) + +study_area_large <- msoa_shp_2011 %>% + filter(geo_code %in% zones_internal$msoa) %>% + rename(MSOA11CD = geo_code) + +# Leeds + +# --- administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +# ADD 2011 MSOA codes to study area +msoa_2011_2021_lookup <- read_csv("data/external/cpc/MSOA_2011_2021.csv") + +study_area <- study_area %>% + left_join(msoa_2011_2021_lookup %>% + select(c("MSOA11CD", "MSOA21CD")), + by = "MSOA21CD") + + +# ----------------------- 2. ADD MSOA CODES AND FILTER TO INTERNAL FLOWS ----------------------- # + +# ----- filter zones to study area + +# filter matrices to keep study area zones only (i.e intrazonal flows only) +cpc_matrices_all_internal <- cpc_matrices_all %>% + filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id) + +# prepare data for od::od_to_sf() + +# ----- add msoa 2011 codes +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + # join origin data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(from_msoa = msoa, from_msoa_name = msoa_name), + by = c("from_zone" = "zone_id")) %>% + # join destination data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(to_msoa = msoa, to_msoa_name = msoa_name), + by = c("to_zone" = "zone_id")) %>% + # move msoa codes to first two columns (for od::od_to_sf()) + relocate("from_msoa", .before = everything()) %>% + relocate("to_msoa", .after = "from_msoa") + + +# Filter to Leeds only +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + filter(from_msoa %in% study_area$MSOA11CD & to_msoa %in% study_area$MSOA11CD) + +# -------------- 3. JOIN TRAVEL TIME DATA ---------- # + +# ---------- MSOA level + +# read in the data +tt_matrix_msoa <- arrow::read_parquet("data/processed/travel_times/MSOA/travel_time_matrix_expanded_temporal.parquet") + +# some OD pairs don't have travel time data (for some combinations). Expand grid so that we explitly mention these OD pairs +tt_matrix_msoa_exp <- tidyr::crossing(from_id = tt_matrix_msoa$from_id, to_id = tt_matrix_msoa$to_id, combination = tt_matrix_msoa$combination) + +tt_matrix_msoa <- tt_matrix_msoa_exp %>% + left_join(tt_matrix_msoa, by = c("from_id", "to_id", "combination")) + + + +# add metadata (MSOA for each home and workplace zone) +tt_matrix_msoa <- tt_matrix_msoa %>% + left_join(study_area %>% + select(MSOA21CD, MSOA11CD, OBJECTID) %>% + st_drop_geometry() %>% + rename_with(~paste0(., "_home")) %>% + mutate(across(everything(), ~as.character(.))), + by = c("from_id" = "OBJECTID_home")) %>% + left_join(study_area %>% + select(MSOA21CD, MSOA11CD, OBJECTID) %>% + st_drop_geometry() %>% + rename_with(~paste0(., "_work")) %>% + mutate(across(everything(), ~as.character(.))), + by = c("to_id" = "OBJECTID_work")) + + +# ----- Prepare time_of_day column in tt_matrix to match demand matrix + +# keep only pt travel times +tt_matrix_msoa <- tt_matrix_msoa %>% + filter(str_detect(combination, "pt_")) + +# add combination column to cpc data +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + mutate(combination = case_when( + source %in% c("05-06", "06-07", "07-08") ~ "pt_wkday_06_30", + source %in% c("08-09", "09-10", "10-11") ~ "pt_wkday_09_30", + source %in% c("11-12", "12-13", "13-14") ~ "pt_wkday_12_30", + source %in% c("14-15", "15-16", "16-17") ~ "pt_wkday_15_30", + source %in% c("17-18", "18-19", "19-20") ~ "pt_wkday_18_30") + ) + +# group by combination column +cols_to_sum = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow") + +cpc_matrices_all_internal_grouped <- cpc_matrices_all_internal %>% + group_by(from_msoa, to_msoa, combination) %>% + summarise(across(cols_to_sum, sum, na.rm = TRUE)) %>% + ungroup() + + +# join travel time data and census commute data +#TODO: join on time of day also!!! + +cpc_matrices_all_internal_tt <- cpc_matrices_all_internal_grouped %>% + left_join(tt_matrix_msoa, by = c("from_msoa" = "MSOA11CD_home", "to_msoa" = "MSOA11CD_work", "combination")) + + +# # ---------- 4. ADD DESIRE LINES ---------- # +# +# # # Filter matrix by distance also adds desire lines +# cpc_matrices_all_internal_tt_sf <- filter_matrix_by_distance(zones = study_area_large, +# od_matrix = cpc_matrices_all_internal_tt, +# dist_threshold = 500) + + + +# save +#write_csv(cpc_matrices_all_internal_tt, "data/raw/travel_demand/cpc_matrices_2019/demand_study_area_msoa.csv") +arrow::write_parquet(cpc_matrices_all_internal_tt, "data/raw/travel_demand/cpc_matrices_2019/demand_study_area_msoa.parquet") + + + diff --git a/code/x_cpc_exploration.R b/code/x_cpc_exploration.R new file mode 100644 index 0000000..85ea814 --- /dev/null +++ b/code/x_cpc_exploration.R @@ -0,0 +1,249 @@ +library(tidyverse) +library(sf) +library(od) +library(tmap) + +source("R/study_area_geographies.R") +source("R/filter_od_matrix.R") + +plots_path <- "data/processed/plots/eda/cpc/" + +# ----------------------- READ IN THE DATA ----------------------- # + +# ------- Zoning layer (2011 MSOAs) + +zones <- read_csv("data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv") +# study area zones +zones_internal = zones %>% + filter(zone_type == 1) + +# ------- CPC data + +# Define the path to the ZIP file +zip_file_path <- "data/external/cpc/cpc_west_yorkshire_weekdays_march_may.zip" +# Create a temporary directory to extract the files +temp_dir <- tempdir() +# Extract the ZIP file to the temporary directory +unzip(zip_file_path, exdir = temp_dir) +# Get a list of all CSV files in the temporary directory +csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE) + +# Read each CSV file into a list of data frames +cpc_matrices <- purrr::map(csv_files, read.csv) + +# Extract the part of the filename after the underscore and before the .csv extension +file_names <- basename(csv_files) +short_names <- stringr::str_extract(file_names, "(?<=_).*(?=\\.csv)") + +# Name each data frame in the list by its original file name +names(cpc_matrices) <- short_names + +# --- Combine the dfs + +# Add a new column to each data frame with the name of the data frame +cpc_matrices <- purrr::map2(cpc_matrices, short_names, ~mutate(.x, source = .y)) + +# Combine into one df +cpc_matrices_all <- bind_rows(cpc_matrices) + +# replace NA values with 0 +cols <- c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb") + +cpc_matrices_all <- cpc_matrices_all %>% + mutate(across(all_of(cols), ~replace_na(., 0))) + +# Add column to sum all flow types +cpc_matrices_all <- cpc_matrices_all %>% + mutate(total_flow = rowSums(across(hbw_outbound:nhb), na.rm = TRUE)) + +# ------- Geo boundaries + +# England +msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp") + +# West Yorkshire (All zones with zone_type == 1) + +study_area_large <- msoa_shp_2011 %>% + filter(geo_code %in% zones_internal$msoa) %>% + rename(MSOA11CD = geo_code) + +# Leeds + +# --- administrative boundaries +study_area <- st_read("data/interim/study_area_boundary.geojson") +# convert to desired resolution +geography = "MSOA" +study_area = study_area_geographies(study_area = study_area, + geography = geography) + +# ADD 2011 MSOA codes to study area +msoa_2011_2021_lookup <- read_csv("data/external/cpc/MSOA_2011_2021.csv") + +study_area <- study_area %>% + left_join(msoa_2011_2021_lookup %>% + select(c("MSOA11CD", "MSOA21CD")), + by = "MSOA21CD") + + + +# ----------------------- ADD GEOGRAPHIC COORDINATES TO ZONES ----------------------- # + +# ----- filter zones to study area + +# filter matrices to keep study area zones only (i.e intrazonal flows only) +cpc_matrices_all_internal <- cpc_matrices_all %>% + filter(from_zone %in% zones_internal$zone_id & to_zone %in% zones_internal$zone_id) + + +# ----- Add desire lines + +# prepare data for od::od_to_sf() + +# ----- add msoa 2011 codes +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + # join origin data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(from_msoa = msoa, from_msoa_name = msoa_name), + by = c("from_zone" = "zone_id")) %>% + # join destination data + left_join(zones_internal %>% + select(msoa, zone_id, msoa_name) %>% + rename(to_msoa = msoa, to_msoa_name = msoa_name), + by = c("to_zone" = "zone_id")) %>% + # move msoa codes to first two columns (for od::od_to_sf()) + relocate("from_msoa", .before = everything()) %>% + relocate("to_msoa", .after = "from_msoa") + +# od::od_to_sf + +# cpc_matrices_all_internal_sf <- od::od_to_sf(x = cpc_matrices_all_internal, +# z = study_area_cpc_internal, +# silent = FALSE) + +# remove short ods +cpc_matrices_all_internal <- cpc_matrices_all_internal %>% + rename(Origin = from_msoa, Destination = to_msoa) + + +# Filter matrix by distance also adds desire lines +cpc_matrices_all_internal_sf <- filter_matrix_by_distance(zones = study_area_large, od_matrix = cpc_matrices_all_internal, dist_threshold = 1000) + + + + +map_cpc_flows = function(od_matrix, + area, + column_to_map, + cols, + min_flows){ + + # --- study_area + # all of WY + if (area == "large"){ + study_area = study_area_large + # Leeds only + } else if (area == "small") { + study_area = study_area + } + + # ----- We filter the od_matrix to only include flows that start and end in the study_Area + od_matrix = od_matrix %>% + filter(Origin %in% study_area$MSOA11CD & + Destination %in% study_area$MSOA11CD) + # arrange so that highest flows are plotting on top + od_matrix = od_matrix %>% arrange(!!sym(column_to_map)) + + # ----- Plot + + # Use the dynamic column name in the filter and tm_lines functions + tm_shape(study_area) + + tm_borders(col = "grey80", alpha = 0.7) + + tm_shape(od_matrix %>% + filter((!!sym(column_to_map)) > min_flows)) + + tm_lines(col = column_to_map, + title.col = "Number of people", + lwd = column_to_map, + legend.lwd.show = FALSE, + palette = "YlGnBu", # YlOrRd # PuBuGn + scale = 7) + + tm_facets(by = "source", + free.coords = FALSE, + ncol = cols) + + # START AND ENDPOINTS + tm_shape(od_matrix %>% + filter((!!sym(column_to_map)) > min_flows) %>% + mutate(geometry = lwgeom::st_startpoint(.))) + + tm_dots(col = "darkgreen", + alpha = 0.4, + #jitter = 0.1, + size = column_to_map, + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "source", + free.coords = FALSE, + ncol = cols, + showNA = FALSE) + + tm_shape(od_matrix %>% + filter((!!sym(column_to_map)) > min_flows) %>% + mutate(geometry = lwgeom::st_endpoint(.))) + + tm_dots(col = "red", + alpha = 0.4, + #jitter = 0.1, + size = column_to_map, + scale = 0.5, + legend.size.show = FALSE) + + tm_facets(by = "source", + free.coords = FALSE, + ncol = cols, + showNA = FALSE) + + tm_layout(fontfamily = 'Georgia', + main.title = paste0("Flows: Purpose = ", column_to_map), + main.title.size = 1.1, + main.title.color = "azure4", + main.title.position = "left", + panel.label.size = 1, + panel.label.bg.color = NA, + # legend.outside = TRUE, + # legend.outside.position = "bottom", + # legend.stack = "horizontal", + frame = FALSE) + + tm_add_legend(type = "symbol", labels = 'OD Start', col = 'darkgreen') + + tm_add_legend(type = "symbol", labels = 'OD End', col = 'red') -> res + + res + + tmap_save(tm = res, filename = paste0(plots_path, "cpc_facet_", column_to_map, "_scope_", area, ".png"), width = 12, dpi = 1080, asp = 0) + + + + + + + + } + +# # Apply the function for a specific column +# map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf, +# area = "small", # c(small, large) +# column_to_map = "hbo_outbound", +# cols = 5, +# min_flows = 20) + + +# Apply to all columns +columns = c("hbw_outbound", "hbw_inbound", "hbo_outbound", "hbo_inbound", "nhb", "total_flow") + +# SMALL (Leeds Boundary) +purrr::map(.x = columns, ~map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf, + area = "small", + column_to_map = .x, + cols = 5, + min_flow = 10)) + +# LARGE (West Yorskshire) +purrr::map(.x = columns, ~map_cpc_flows(od_matrix = cpc_matrices_all_internal_sf, + area = "large", + column_to_map = .x, + cols = 5, + min_flow = 10)) diff --git a/code/x_cpc_msoa_zoning_request.R b/code/x_cpc_msoa_zoning_request.R new file mode 100644 index 0000000..35a4c74 --- /dev/null +++ b/code/x_cpc_msoa_zoning_request.R @@ -0,0 +1,282 @@ +library(tidyverse) +library(sf) +library(tmap) + + +# ---------- Read in data + +# --- 1. CPC data template - 2011 MSOA layer +cpc_zones <- read_csv("data/external/cpc/ZoningTemplate.csv") + +# --- 2. 2011 MSOA boundaries (geo) +msoa_shp_2011 <- st_read("data/external/msoa_england_2011/infuse_msoa_lyr_2011_clipped.shp") + +# --- 3. Commuting matrices (2021) +msoa_commute_2021 <- read_csv("data/raw/travel_demand/od_census_2021/ODWP15EW_MSOA_v1.zip") + +# simplify +msoa_commute_2021 <- msoa_commute_2021 %>% + rename(msoa_origin = "Middle layer Super Output Areas code", msoa_destination = "MSOA of workplace code") %>% + # aggregate: we don't care about gender and age breakdown + group_by(msoa_origin, msoa_destination) %>% + summarise(commuters = sum(Count)) %>% + ungroup() + +# add 2011 msoa names +msoa_2011_2021 <- read_csv("data/external/cpc/MSOA_2011_2021.csv") %>% + select(MSOA11CD, MSOA21CD, LAD22CD) + +msoa_commute_2021_2011 <- msoa_commute_2021 %>% + # add 2011 msoa codes (origin) + left_join(msoa_2011_2021 %>% + rename_with(~ paste0(.x, "_origin")), + by = c("msoa_origin" = "MSOA21CD_origin")) %>% + # add 2011 msoa codes (destination) + left_join(msoa_2011_2021 %>% + rename_with(~ paste0(.x, "_destination")), + by = c("msoa_destination" = "MSOA21CD_destination")) + + + + +# ---------- Join the geo data (2) onto (1) +cpc_zones_geo <- cpc_zones %>% + left_join(msoa_shp_2011 %>% + select(geo_code), + by = c("msoa" = "geo_code")) %>% + st_as_sf() + + +cps_zones_yh_region <- cpc_zones_geo %>% + filter(region_name == "Yorkshire and The Humber") + +cps_zones_wy_county <- cpc_zones_geo %>% + filter(county_name == "West Yorkshire") + + + +# ---------- Commuting matrices + +# which OD pairs have an origin or destination inside our study area +msoa_commute_2021_2011_region = msoa_commute_2021_2011 %>% + filter(MSOA11CD_origin %in% cps_zones_wy_county$msoa | + MSOA11CD_destination %in% cps_zones_wy_county$msoa) + + +# ---------- + +msoas_of_interest <- cpc_zones_geo %>% + filter(msoa %in% msoa_commute_2021_2011_region$MSOA11CD_origin | + msoa %in% msoa_commute_2021_2011_region$MSOA11CD_destination) + + + +# label msoas in west yorkshire + +msoas_of_interest %>% + group_by(lad) %>% + mutate(zone_id = n()) %>% + mutate(zone_id = as.character(zone_id)) %>% + ungroup() -> x + + +tm_shape(x)+ + tm_fill(col = "zone_id", + palette = "Dark2") + + +# ------------------------- APPROACH (msoa -> lad -> county) + +# ----- study area +study_area_county = "E11000006" +study_area_region = "E12000003" + +# 1.LEVEL = MSOA + +# --- west yorkshire +cpc_zones_wy = cpc_zones_geo %>% + filter(county == study_area_county) %>% + mutate(id_col = row_number(), + zone_type = 1) # internal to the model + + +# 2.LEVEL = LAD + +# --- yorkshire humber region +cpc_zones_yh_region = cpc_zones_geo %>% + filter(region == study_area_region) %>% + filter(! msoa %in% cpc_zones_wy$msoa) %>% + group_by(lad) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_wy$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# --- greater manchester and lancashire and derbyshire +cpc_zones_other_neighbors = cpc_zones_geo %>% + filter(county_name %in% c("Greater Manchester", "Derbyshire", "Lancashire")) %>% + #filter(! msoa %in% cpc_zones_wy$msoa) %>% + group_by(lad) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_yh_region$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# Combine (LEVEL 1) and (LEVEL 2) + +cpc_zones_study_plus = cpc_zones_wy %>% + bind_rows(cpc_zones_yh_region) %>% + bind_rows(cpc_zones_other_neighbors) + +# 3.LEVEL = COUNTY (OR REGION - check which one I used) + +# ----- Other areas + +cpc_zones_other = cpc_zones_geo %>% + filter(country_name == "England") %>% + filter(! msoa %in% cpc_zones_study_plus$msoa) %>% + group_by(region) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_study_plus$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# 4.LEVEL = COUNTRY + +# ----- Scotland and Wales + +cpc_zones_countries = cpc_zones_geo %>% + filter(country_name != "England") %>% + group_by(country_name) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_other$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# bring all together +cpc_zones_all = cpc_zones_study_plus %>% + bind_rows(cpc_zones_other) %>% + bind_rows(cpc_zones_countries) + + +cpc_zones_all <- cpc_zones_all %>% + mutate(zone_id = id_col) + + +cpc_zones_all_df = cpc_zones_all %>% + select(msoa:msoa_name) %>% + st_drop_geometry() + +# Save +write_csv(cpc_zones_all_df, "data/external/cpc/ZoningTemplateFilled_lad_region.csv") + + +cpc_zones_all %>% filter(zone_id %in% c(347, 348)) -> x + +tm_shape(x)+ + tm_fill(col = "zone_id", + palette = "Dark2", + legend.show = FALSE) + + +# ------------------------- APPROACH (msoa -> lad) + scot wales as countries (could not use them as LADs due to an error on cpc website) ---------- # +# ------- THIS IS THE LAYER I SUBMITTED ------- # + +# ----- study area +study_area_county = "E11000006" + +# 1.LEVEL = MSOA + +# --- west yorkshire +cpc_zones_wy = cpc_zones_geo %>% + filter(county == study_area_county) %>% + mutate(id_col = row_number(), + zone_type = 1) # internal to the model + + +# 2.LEVEL = LAD + +# --- yorkshire humber region +cpc_zones_other = cpc_zones_geo %>% + filter(country_name == "England") %>% + filter(! msoa %in% cpc_zones_wy$msoa) %>% + group_by(lad) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_wy$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + +# ----- Scotland and Wales + +cpc_zones_scot_wales = cpc_zones_geo %>% + filter(country_name != "England") %>% + group_by(country_name) %>% + mutate(id_col = cur_group_id() + max(cpc_zones_other$id_col)) %>% + ungroup() %>% + mutate(zone_type = 0) # external to the model + + + +# Combine + +cpc_zones_all = cpc_zones_wy %>% + bind_rows(cpc_zones_other) %>% + bind_rows(cpc_zones_scot_wales) %>% + mutate(zone_id = id_col) + +cpc_zones_all_df = cpc_zones_all %>% + select(msoa:msoa_name) %>% + st_drop_geometry() + +# Save +write_csv(cpc_zones_all_df, "data/external/cpc/ZoningTemplateFilled_lad_scot_wales.csv") + + +cpc_zones_all %>% filter(zone_id %in% c(629, 643, 652)) -> x + +tm_shape(x)+ + tm_fill(col = "lad", + palette = "Dark2") + + + + + + + + + + + + +tm_shape(cpc_zones_all %>% + filter(zone_id == 375))+ + tm_fill(col = "zone_id", + palette = "Dark2", + legend.show = FALSE) + +cpc_zones_all %>% + filter(zone_id == 375) -> x +# PLOTS + +tm_shape(cpc_zones_all)+ + tm_fill(col = "zone_id", + palette = "Dark2", + legend.show = FALSE) + + +tm_shape(cpc_zones_all %>% + #filter(county_name == "West Yorkshire") %>% + filter(region_name == "Yorkshire and The Humber") %>% + mutate(id_col = as.character(id_col)))+ + tm_fill(col = "lad", + palette = "Dark2") + + +tm_shape(cpc_zones_all %>% + filter(county_name == "Inner London" | county_name == "Outer London")) + + tm_fill(col = "lad", + palette = "Dark2") + + +unique(cpc_zones_all2$zone_id)