filter od pairs based on poor supply. Jitter not working now. ref #27

Hussein-Mahfouz · Feb 21, 2024 · 793752c · 793752c
1 parent 06f5e64
commit 793752c
Show file tree

Hide file tree

Showing 3 changed files with 201 additions and 32 deletions.
diff --git a/code/demand_cluster_flows_prep.R b/code/demand_cluster_flows_prep.R
@@ -3,7 +3,7 @@
 
 library(tidyverse)
 library(sf)
-library(lwgeom)
+#library(lwgeom)
 # jittering
 library(odjitter)
 
@@ -13,8 +13,20 @@ source("R/filter_od_matrix.R")
 
 
 ########## ----------------------- Read in the data ----------------------- ##########
+
 # is the data disaggregated by mode?
 mode <- TRUE
+#mode <- FALSE
+
+# which layer are we jittering?
+# Option 1: All OD pairs
+# Option 2: OD pairs with poor PT supply (many transfers or low travel speed)
+# Option 3: OD pairs with poor PT supply and low potential demand
+
+
+option <- 1
+
+
 # ----------- 1. Study area
 
 # --- administrative boundaries
@@ -41,24 +53,28 @@ study_area <- study_area %>%
 
 if(mode == FALSE){
   # data with "commute_all" only
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
 } else{
   # data with modes
-  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
+  #od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
 }
 
 # filter to specific combination
 # TODO: get seperate flows for car and pt, and keep two combinations
 od_demand <- od_demand %>%
   filter(combination == "pt_wkday_morning")
 
-# rename columns as most functions are applied on generic column names
-from_id_col = paste0(geography, "21CD_home")
-to_id_col = paste0(geography, "21CD_work")
-
-od_demand = od_demand %>%
-  rename("Origin" = all_of(from_id_col),
-         "Destination" = all_of(to_id_col))
+od_demand <- od_demand %>%
+  select(-distance_m)
+# # rename columns as most functions are applied on generic column names
+# from_id_col = paste0(geography, "21CD_home")
+# to_id_col = paste0(geography, "21CD_work")
+#
+# od_demand = od_demand %>%
+#   rename("Origin" = all_of(from_id_col),
+#          "Destination" = all_of(to_id_col))
 
 ########## ----------------------- Convert df to sf desire lines ----------------------- ##########
 
@@ -71,8 +87,40 @@ od_demand = od_demand %>%
 # based on length (
 od_demand_filtered = filter_matrix_by_distance(zones = study_area,
                                                od_matrix = od_demand,
-                                               dist_threshold = 500)
+                                               dist_threshold = 1000)
+
+# add unique id for each row
+od_demand_filtered <- od_demand_filtered %>%
+  mutate(od_id = paste0(Origin, "-", Destination, "-", combination))
+########## ----------------------- Decide on the OD pairs we want to analyse ----------------------- ##########
+
+# Option 1: All OD pairs
+# Option 2: OD pairs with poor PT supply (many transfers or low travel speed)
+# Option 3: OD pairs with poor PT supply and low potential demand
+
+# Option 1:
+od_demand_1 <- od_demand_filtered
+
+# Option 2:
+od_demand_2 <- od_demand_filtered %>%
+  # transfers - NA transfers means there is no option to go by bus
+  filter(n_rides > 1 | is.na(n_rides) |
+           speed_percentile < 0.5 | is.na(speed_percentile))
+
 
+# option 3:
+
+# get percentiles
+od_demand_3 <- od_demand_filtered %>%
+  mutate(demand_route_percentile = percent_rank(potential_demand_equal_split),
+         demand_route_percentile_fct = cut(demand_route_percentile,
+                                    breaks = seq(0, 1, by = 0.25),
+                                    include.lowest = TRUE))
+
+
+# od_filtered: keeps od pairs in od_demand_poor_Supply that have low pd on routes
+od_demand_3 <- od_demand_3 %>%
+  filter(od_id %in% od_demand_2$od_id & demand_route_percentile < 0.5)
 
 
 ########## ----------------------- Jitter the points ----------------------- ##########
@@ -103,13 +151,32 @@ sub_zones <- sub_zones %>%
 
 ##### ----- STEP 2: Jittering
 
+
+# what layer are we jittering
+if(option == 1){
+  od_demand_for_jittering = od_demand_1
+} else if(option == 2){
+  od_demand_for_jittering = od_demand_2
+} else if(option == 3){
+  od_demand_for_jittering = od_demand_3
+}
+
+
+# # --- clear temp directory:
+unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
+# confirm it's empty
+dir(tempdir())
+
+od_demand_for_jittering <- od_demand_for_jittering %>%
+  select(Origin, Destination, starts_with("commute_"))
+
 # arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details
 
 od_demand_jittered = odjitter::jitter(
 
   # ----- arguments for FLOW DATA ----- #
 
-  od = od_demand_filtered,
+  od = od_demand_for_jittering,
   # column in "od" that specifies where trips originate
   origin_key = "Origin",
   destination_key = "Destination",
@@ -150,6 +217,37 @@ od_demand_jittered <- od_demand_jittered %>%
 
 # ---------- save output
 
+
+if(option == 1){
+
+  if(mode == FALSE){
+    # data with "commute_all" only
+    st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_all.geojson"), delete_dsn = TRUE)
+  } else{
+    # data with modes
+    st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_mode_all.geojson"), delete_dsn = TRUE)
+  }
+} else if(option == 2){
+
+  if(mode == FALSE){
+    # data with "commute_all" only
+    st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_poor_supply.geojson"), delete_dsn = TRUE)
+  } else{
+    # data with modes
+    st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_mode_poor_supply.geojson"), delete_dsn = TRUE)
+  }
+} else if(option == 3){
+
+  if(mode == FALSE){
+    # data with "commute_all" only
+    st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_poor_supply_low_pd.geojson"), delete_dsn = TRUE)
+  } else{
+    # data with modes
+    st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_mode_poor_supply_low_pd.geojson"), delete_dsn = TRUE)
+  }
+}
+
+
 if(mode == FALSE){
   # data with "commute_all" only
   st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering.geojson"), delete_dsn = TRUE)

diff --git a/code/demand_on_buses.R b/code/demand_on_buses.R
@@ -22,6 +22,8 @@ source("R/filter_od_matrix.R")
 
 ########## ----------------------- Read in the data ----------------------- ##########
 
+# is the demand data disaggregated by mode?
+mode = FALSE
 
 # ----------- 1. Study area
 
@@ -63,16 +65,26 @@ gtfs_bus <- gtfs_bus %>%
 
 # Demand (census) + supply (travel time) data
 
-od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+if(mode == TRUE){
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet"))
 
-# columns to reference (they differ based on geography)
-from_id_col = paste0(geography, "21CD_home")
-to_id_col = paste0(geography, "21CD_work")
+} else{
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet"))
+}
+
+# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+
+# # columns to reference (they differ based on geography)
+# from_id_col = paste0(geography, "21CD_home")
+# to_id_col = paste0(geography, "21CD_work")
 
 # remove intrazone trips
 # TODO: do we need to do this?
+# od_demand <- od_demand %>%
+#   filter(.data[[from_id_col]] != .data[[to_id_col]])
+
 od_demand <- od_demand %>%
-  filter(.data[[from_id_col]] != .data[[to_id_col]])
+  filter("Origin" != "Destination")
 
 ########## ----------------------- Identify which OD demand pairs are served by each bus ----------------------- ##########
 
@@ -115,13 +127,22 @@ od_supply_filtered = filter_matrix_by_distance(zones = study_area,
 #   st_drop_geometry() %>%
 #   #left_join(od_demand,
 #   inner_join(od_demand,
-#              by = c("Origin" = from_id_col, "Destination" = to_id_col))
+#              by = c("Origin", #  = from_id_col,
+#                     "Destination", #  = to_id_col,
+#                     "start_time" = "departure_time"))
+#
+# od_sd <- od_sd %>%
+#   filter(combination == "pt_wkday_morning")
+
+od_sd <- od_demand %>%
+  filter(combination == "pt_wkday_morning") %>%
+  left_join(od_supply_filtered %>%
+              select(-distance_m) %>%
+              st_drop_geometry(),
+  by = c("Origin", #  = from_id_col,
+                    "Destination", #  = to_id_col,
+                    "departure_time" = "start_time"))
 
-od_sd <- od_supply_filtered %>%
-  st_drop_geometry() %>%
-  #left_join(od_demand,
-  inner_join(od_demand,
-             by = c("Origin" = from_id_col, "Destination" = to_id_col, "start_time" = "departure_time"))
 
 # # save output
 # arrow::write_parquet(od_sd, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_demand_and_supply.parquet"))
@@ -131,28 +152,57 @@ od_sd <- od_supply_filtered %>%
 
 # Method 1: all_to_all
 trips_sd_1 <- od_sd %>%
-  group_by(trip_id, start_time, combination) %>%
+  group_by(trip_id, departure_time, combination) %>%
   summarise(potential_demand_all_to_all = sum(commute_all, na.rm = TRUE)) %>%
   ungroup()
 
 # Method 2: frequency-based
 trips_sd_2 <- od_sd %>%
-  group_by(start_time, combination, Origin, Destination) %>%
+  group_by(departure_time, combination, Origin, Destination) %>%
   # get number of passengers on each route for each OD pair
   mutate(group_id = cur_group_id(),
          frequency_min = 3600/headway_secs,
          commute_route = round((commute_all * frequency_min) / sum(frequency_min))) %>%
   ungroup() %>%
   # sum over the route
-  group_by(trip_id, start_time, combination) %>%
+  group_by(trip_id, departure_time, combination) %>%
   summarise(potential_demand_freq_based = sum(commute_route, na.rm = TRUE)) %>%
   ungroup()
 
+# Method 3: split demand equally between all routes serving OD pair
+trips_sd_3 <- od_sd %>%
+  group_by(departure_time, combination, Origin, Destination) %>%
+  # get number of passengers on each route for each OD pair
+  mutate(commute_route = round((commute_all / n()))) %>%
+  ungroup() %>%
+  # sum over the route
+  group_by(trip_id, departure_time, combination) %>%
+  summarise(potential_demand_equal_split = sum(commute_route, na.rm = TRUE)) %>%
+  ungroup()
+
 
 # add all to one df
 trips_sd <- trips_sd_1 %>%
-  left_join(trips_sd_2, by = c("trip_id", "start_time", "combination"))
+  left_join(trips_sd_2, by = c("trip_id", "departure_time", "combination")) %>%
+  left_join(trips_sd_3, by = c("trip_id", "departure_time", "combination"))
+
+
+# add potential demand to original od df
+od_trips_sd <- od_sd %>%
+  left_join(trips_sd, by = c("trip_id", "departure_time", "combination"))
+
+# keep one row per Origin-Destination - the trip with the highest potential demand
+od_trips_sd <- od_trips_sd %>%
+  group_by(Origin, Destination) %>%
+  dplyr::top_n(1, potential_demand_equal_split) %>%
+  ungroup()
 
+if(mode == TRUE){
+  arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
+
+} else{
+  arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
+}
 
 # ----------- 4. Add geometry to plot results
 
@@ -182,6 +232,9 @@ trips_sd_sf_shape_sum <- trips_sd_sf %>%
   summarise(across(contains("potential_demand"), ~ sum(.x, na.rm = TRUE))) %>%
   ungroup()
 
+
+
+
 # ###########  ---------------------------  5. Plots  --------------------------- ##########
 #
 #

diff --git a/code/performance_od.R b/code/performance_od.R
@@ -5,14 +5,16 @@ library(tidyverse)
 library(sf)
 library(tmap)
 
-library(tidygraph)
-library(sfnetworks)
+# library(tidygraph)
+# library(sfnetworks)
 
 
 source("R/study_area_geographies.R")
 source("R/filter_od_matrix.R")
 
 geography = "MSOA"
+# are we processing the data that is disaggregated by mode?
+mode = FALSE
 
 # --- where do we want to save the plots?
 plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/")
@@ -38,7 +40,12 @@ study_area <- study_area %>%
 
 # ----- Travel time and demand matrix
 
-od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+if(mode == TRUE){
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
+
+} else{
+  od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
+}
 
 
 
@@ -60,7 +67,7 @@ od_demand <- od_demand %>%
 # filter od pairs by euclidian distance
 od_demand_sf = filter_matrix_by_distance(zones = study_area,
                                          od_matrix = od_demand,
-                                         dist_threshold = 1000)
+                                         dist_threshold = 500)
 
 
 # ----------- 3. Get travel speeds
@@ -99,8 +106,19 @@ od_demand_sf_rank <- od_demand_sf %>%
   ungroup()
 
 
+# ---------- 5. Save the output
+
+if(mode == TRUE){
+  arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet"))
+
+} else{
+  arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet"))
+}
+
+
+
 
-# ---------- 5. Map the results
+# ---------- 6. Map the results
 
 
 # Lwd: Demand between OD pairs,