maps edited for temporal data

Hussein-Mahfouz · Hussein-Mahfouz · Jul 5, 2024 · Jul 22, 2024 · Oct 27, 2024 · Oct 30, 2024
commit 55975e3d93988003e8bbcba9a2ed17c6f460eff1
diff --git a/code/demand_cluster_flows.R b/code/demand_cluster_flows.R
@@ -317,64 +317,65 @@ hist(distances$fds, breaks = 100)
 #
 # # ----- Sensitivity analysis(for different epsilon and minpts combinaitons)
 #
-# # function to get clustering results for many combinations
-# dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
-#                                              options_epsilon <- c(0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 7.5, 8, 9),
-#                                              options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 700, 1000, 1500, 3000, 5000, 10000),
-#                                              weights = w_vec,
-#                                              flows = st_drop_geometry(od_demand_jittered)
-#                                              )
-#
-#
-# arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
-# # dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
-#
-#
-# # All results plotted together
-# dbscan_sensitivity_res %>%
-#   filter(cluster != 0) %>%
-#   ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-#   geom_col() +
-#   scale_y_continuous(trans='log10') +
-#   facet_wrap(~id, scales = "fixed") +
-#   labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-#        subtitle = "Parameter combinations that returned more than 1 cluster",
-#        x = "Cluster no.",
-#        y = "No. of od pairs in cluster",
-#        fill= "No. of commuters") +
-#  theme_minimal()
-#
-# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_all.png", width = 14, height = 10)
-#
-# dbscan_sensitivity_res %>%
-#   filter(cluster != 0) %>%
-#   group_by(id) %>%
-#   #mutate(clusters = n()) %>%
-#   # How many clusters have more than 5 od pairs in them?
-#   mutate(clusters = sum(size > 5)) %>%
-#   ungroup() %>%
-#   filter(clusters > 5) %>%
-#   ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
-#   geom_col() +
-#   scale_y_continuous(trans='log10') +
-#   facet_wrap(~id, scales = "fixed") +
-#   labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
-#        subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
-#        x = "Cluster no.",
-#        y = "No. of od pairs in cluster",
-#        fill= "No. of commuters") +
-#   theme_minimal()
-#
-# ggsave("data/processed/plots/eda/od_clustering/sensitivity_analysis_eps_minpts_filtered.png", width = 14, height = 10)
-#
+# function to get clustering results for many combinations
+dbscan_sensitivity_res <- dbscan_sensitivity(distance_matrix = dist_mat,
+                                             options_epsilon <- c(0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9),
+                                             options_minpts <- c(50, 75, 100, 150, 175, 200, 250, 300, 400, 500, 1000),
+                                             weights = w_vec,
+                                             flows = st_drop_geometry(od_demand_jittered),
+                                             flow_column = "total_flow"
+                                             )
+
+
+arrow::write_parquet(dbscan_sensitivity_res, paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity_", day_time, ".parquet"))
+# dbscan_sensitivity_res <- arrow::read_parquet(paste0("data/interim/travel_demand/", geography, "/od_demand_clustering_sensitivity.parquet"))
+
+
+# All results plotted together
+dbscan_sensitivity_res %>%
+  filter(cluster != 0) %>%
+  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+  geom_col() +
+  scale_y_continuous(trans='log10') +
+  facet_wrap(~id, scales = "fixed") +
+  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+       subtitle = "Parameter combinations that returned more than 1 cluster",
+       x = "Cluster no.",
+       y = "No. of od pairs in cluster",
+       fill= "No. of commuters") +
+ theme_bw()
+
+ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_all_", day_time, ".png"), width = 14, height = 10)
+
+dbscan_sensitivity_res %>%
+  filter(cluster != 0) %>%
+  group_by(id) %>%
+  #mutate(clusters = n()) %>%
+  # How many clusters have more than 5 od pairs in them?
+  mutate(clusters = sum(size > 5)) %>%
+  ungroup() %>%
+  filter(clusters > 5) %>%
+  ggplot(aes(x = cluster, y = size, fill = commuters_sum)) +
+  geom_col() +
+  scale_y_continuous(trans='log10') +
+  facet_wrap(~id, scales = "fixed") +
+  labs(title = "Sensitivity analysis for clustering - Varying {eps} and {minPts}",
+       subtitle = "Parameter combinations with > 5 clusters having at least 5 od pairs each",
+       x = "Cluster no.",
+       y = "No. of od pairs in cluster",
+       fill= "No. of commuters") +
+  theme_bw()
+
+ggsave(paste0("data/processed/plots/eda/od_clustering/temporal/sensitivity_analysis_eps_minpts_filtered_", day_time, ".png"), width = 14, height = 10)
+
 
 
 ###  --------------------  STEP 3: Cluster  -------------------- ###
 
 
 # cluster option 1: border points assigned to cluster
 cluster_dbscan = dbscan::dbscan(dist_mat,
-                                minPts = 70, # 125
+                                minPts = 50, # 125
                                 eps = 8, # 9.5
                                 #borderPoints = FALSE,
                                 weights = w_vec)

diff --git a/code/demand_cluster_flows_maps.R b/code/demand_cluster_flows_maps.R
@@ -377,7 +377,7 @@ tm_shape(study_area) +
             #panel.show = FALSE,
             panel.label.size = 1,
             panel.label.bg.color = NA,
-           # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
+            # panel.labels = 1:length(unique(cluster_dbscan_res_mode_poly$cluster)),
             frame = FALSE) -> map_cluster_results_bus_frac_grouped_gtfs_poly
 
 map_cluster_results_bus_frac_grouped_gtfs_poly
@@ -742,7 +742,7 @@ tm_shape(study_area) +
            breaks = c(0, 0.25, 0.5, 0.75, 1, Inf),
            palette = "RdYlGn", #Accent
            alpha = 0.4,
-          # title.col = "Fraction of Bus to Car users",
+           # title.col = "Fraction of Bus to Car users",
            #title.lwd = "No. of commuters",
            legend.col.show = FALSE,
            legend.lwd.show = FALSE,
@@ -1143,7 +1143,7 @@ tm_shape(basemap_urban_rural) +
   ) +
   # ---- clusters
   # poly border
-tm_shape(clusters_vis_mode_poly %>%
+  tm_shape(clusters_vis_mode_poly %>%
              filter(cluster %in% clusters_vis_mode_poly_filt$cluster)) +
   tm_borders(col = "black",
              lwd = 3,
@@ -1154,9 +1154,9 @@ tm_shape(clusters_vis_mode_poly %>%
             nrow = rows,
             showNA = FALSE) +
   # poly fill
-# tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
-#              filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
-tm_shape(clusters_vis_mode_poly_filt %>%
+  # tm_shape(cluster_dbscan_res_mode_poly_filt_max %>%
+  #              filter(cluster %in% cluster_dbscan_res_mode_poly_filt$cluster) %>%
+  tm_shape(clusters_vis_mode_poly_filt %>%
              st_buffer(1000)) +
   tm_borders(col = "darkgreen",
              lwd = 2) +
@@ -1221,9 +1221,9 @@ tm_shape(basemap_urban_rural) +
   tm_shape(st_union(clusters_vis_mode_poly_filt %>%
                       mutate(area = st_area(.)) %>%
                       filter(area > 0.2 * mean(area)))) +
-    tm_borders(col = "darkgreen",
-               lwd = 3.5,
-               lty = "dashed") +
+  tm_borders(col = "darkgreen",
+             lwd = 3.5,
+             lty = "dashed") +
   tm_layout(fontfamily = 'Georgia',
             main.title = paste0("Potential DRT Service Areas"),
             main.title.size = 1.1,
@@ -1487,8 +1487,8 @@ ggplot(clusters_ur_poly_combined) +
                     #direction = -1,
                     labels = function(x) str_wrap(x, width = 25)) +
   scale_color_manual(values = colors_urban_rural,
-                    #direction = -1,
-                    labels = function(x) str_wrap(x, width = 25)) +
+                     #direction = -1,
+                     labels = function(x) str_wrap(x, width = 25)) +
   labs(x = "",
        y = "Area covered by cluster (km2)",
        fill = "Rural / Urban Classification",
@@ -1507,4 +1507,3 @@ ggplot(clusters_ur_poly_combined) +
   facet_wrap(facets = vars(cluster))
 
 ggsave(paste0(plots_path, "figure_bar_urban_rural_compare_filter_no_filter_facet_cluster_scenario_", scenario, "_length_", distance_threshold, ".png"), height = 8, width = 6)
-