Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

29 cpc data #31

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Prev Previous commit
Next Next commit
maps edited for temporal data
Hussein-Mahfouz committed Nov 5, 2024
commit 55975e3d93988003e8bbcba9a2ed17c6f460eff1
103 changes: 52 additions & 51 deletions code/demand_cluster_flows.R
Original file line number Diff line number Diff line change
@@ -317,64 +317,65 @@ hist(distances$fds, breaks = 100)
#
# # ----- Sensitivity analysis(for different epsilon and minpts combinaitons)
#
# # function to get clustering results for many combinations
# dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
# options_epsilon <- c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 7.5, 8, 9),
# options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 700, 1000, 1500, 3000, 5000, 10000),
# weights = w_vec,
# flows = st_drop_geometry(od_demand_jittered)
# )
#
#
# arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
# # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
#
#
# # All results plotted together
# dbscan_sensitivity_res %>%
# filter(cluster != 0) %>%
# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
# geom_col() +
# scale_y_continuous(trans='log10') +
# facet_wrap(~id, scales = "fixed") +
# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
# subtitle = "Parameter combinations that returned more than 1 cluster",
# x = "Cluster no.",
# y = "No. of od pairs in cluster",
# fill= "No. of commuters") +
# theme_minimal()
#
# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_all.png", width = 14, height = 10)
#
# dbscan_sensitivity_res %>%
# filter(cluster != 0) %>%
# group_by(id) %>%
# #mutate(clusters = n()) %>%
# # How many clusters have more than 5 od pairs in them?
# mutate(clusters = sum(size > 5)) %>%
# ungroup() %>%
# filter(clusters > 5) %>%
# ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
# geom_col() +
# scale_y_continuous(trans='log10') +
# facet_wrap(~id, scales = "fixed") +
# labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
# subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
# x = "Cluster no.",
# y = "No. of od pairs in cluster",
# fill= "No. of commuters") +
# theme_minimal()
#
# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_filtered.png", width = 14, height = 10)
#
# function to get clustering results for many combinations
dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9),
options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000),
weights = w_vec,
flows = st_drop_geometry(od_demand_jittered),
flow_column = "total_flow"
)


arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet"))
# dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))


# All results plotted together
dbscan_sensitivity_res %>%
filter(cluster != 0) %>%
ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
geom_col() +
scale_y_continuous(trans='log10') +
facet_wrap(~id, scales = "fixed") +
labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
subtitle = "Parameter combinations that returned more than 1 cluster",
x = "Cluster no.",
y = "No. of od pairs in cluster",
fill= "No. of commuters") +
theme_bw()

ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10)

dbscan_sensitivity_res %>%
filter(cluster != 0) %>%
group_by(id) %>%
#mutate(clusters = n()) %>%
# How many clusters have more than 5 od pairs in them?
mutate(clusters = sum(size > 5)) %>%
ungroup() %>%
filter(clusters > 5) %>%
ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
geom_col() +
scale_y_continuous(trans='log10') +
facet_wrap(~id, scales = "fixed") +
labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
x = "Cluster no.",
y = "No. of od pairs in cluster",
fill= "No. of commuters") +
theme_bw()

ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10)



### -------------------- STEP 3: Cluster -------------------- ###


# cluster option 1: border points assigned to cluster
cluster_dbscan = dbscan::dbscan(dist_mat,
minPts = 70, # 125
minPts = 50, # 125
eps = 8, # 9.5
#borderPoints = FALSE,
weights = w_vec)
23 changes: 11 additions & 12 deletions code/demand_cluster_flows_maps.R
Original file line number Diff line number Diff line change
@@ -377,7 +377,7 @@ tm_shape(study_area) +
#panel.show = FALSE,
panel.label.size = 1,
panel.label.bg.color = NA,
# panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
# panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly

map_cluster_results_bus_frac_grouped_gtfs_poly
@@ -742,7 +742,7 @@ tm_shape(study_area) +
breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
palette = "RdYlGn", #Accent
alpha = 0.4,
# title.col = "Fraction of Bus to Car users",
# title.col = "Fraction of Bus to Car users",
#title.lwd = "No. of commuters",
legend.col.show = FALSE,
legend.lwd.show = FALSE,
@@ -1143,7 +1143,7 @@ tm_shape(basemap_urban_rural) +
) +
# ---- clusters
# poly border
tm_shape(clusters_vis_mode_poly %>%
tm_shape(clusters_vis_mode_poly %>%
filter(cluster %in% clusters_vis_mode_poly_filt$cluster)) +
tm_borders(col = "black",
lwd = 3,
@@ -1154,9 +1154,9 @@ tm_shape(clusters_vis_mode_poly %>%
nrow = rows,
showNA = FALSE) +
# poly fill
# tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
# filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
tm_shape(clusters_vis_mode_poly_filt %>%
# tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
# filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
tm_shape(clusters_vis_mode_poly_filt %>%
st_buffer(1000)) +
tm_borders(col = "darkgreen",
lwd = 2) +
@@ -1221,9 +1221,9 @@ tm_shape(basemap_urban_rural) +
tm_shape(st_union(clusters_vis_mode_poly_filt %>%
mutate(area = st_area(.)) %>%
filter(area > 0.2 * mean(area)))) +
tm_borders(col = "darkgreen",
lwd = 3.5,
lty = "dashed") +
tm_borders(col = "darkgreen",
lwd = 3.5,
lty = "dashed") +
tm_layout(fontfamily = 'Georgia',
main.title = paste0("Potential DRT Service Areas"),
main.title.size = 1.1,
@@ -1487,8 +1487,8 @@ ggplot(clusters_ur_poly_combined) +
#direction = -1,
labels = function(x) str_wrap(x, width = 25)) +
scale_color_manual(values = colors_urban_rural,
#direction = -1,
labels = function(x) str_wrap(x, width = 25)) +
#direction = -1,
labels = function(x) str_wrap(x, width = 25)) +
labs(x = "",
y = "Area covered by cluster (km2)",
fill = "Rural / Urban Classification",
@@ -1507,4 +1507,3 @@ ggplot(clusters_ur_poly_combined) +
facet_wrap(facets = vars(cluster))

ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)

Loading