Skip to content

Commit

Permalink
filter od pairs based on poor supply. Jitter not working now. ref #27
Browse files Browse the repository at this point in the history
  • Loading branch information
Hussein-Mahfouz committed Feb 21, 2024
1 parent 06f5e64 commit 793752c
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 32 deletions.
122 changes: 110 additions & 12 deletions code/demand_cluster_flows_prep.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

library(tidyverse)
library(sf)
library(lwgeom)
#library(lwgeom)
# jittering
library(odjitter)

Expand All @@ -13,8 +13,20 @@ source("R/filter_od_matrix.R")


########## ----------------------- Read in the data ----------------------- ##########

# is the data disaggregated by mode?
mode <- TRUE
#mode <- FALSE

# which layer are we jittering?
# Option 1: All OD pairs
# Option 2: OD pairs with poor PT supply (many transfers or low travel speed)
# Option 3: OD pairs with poor PT supply and low potential demand


option <- 1


# ----------- 1. Study area

# --- administrative boundaries
Expand All @@ -41,24 +53,28 @@ study_area <- study_area %>%

if(mode == FALSE){
# data with "commute_all" only
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
#od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
} else{
# data with modes
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
#od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
}

# filter to specific combination
# TODO: get seperate flows for car and pt, and keep two combinations
od_demand <- od_demand %>%
filter(combination == "pt_wkday_morning")

# rename columns as most functions are applied on generic column names
from_id_col = paste0(geography, "21CD_home")
to_id_col = paste0(geography, "21CD_work")

od_demand = od_demand %>%
rename("Origin" = all_of(from_id_col),
"Destination" = all_of(to_id_col))
od_demand <- od_demand %>%
select(-distance_m)
# # rename columns as most functions are applied on generic column names
# from_id_col = paste0(geography, "21CD_home")
# to_id_col = paste0(geography, "21CD_work")
#
# od_demand = od_demand %>%
# rename("Origin" = all_of(from_id_col),
# "Destination" = all_of(to_id_col))

########## ----------------------- Convert df to sf desire lines ----------------------- ##########

Expand All @@ -71,8 +87,40 @@ od_demand = od_demand %>%
# based on length (
od_demand_filtered = filter_matrix_by_distance(zones = study_area,
od_matrix = od_demand,
dist_threshold = 500)
dist_threshold = 1000)

# add unique id for each row
od_demand_filtered <- od_demand_filtered %>%
mutate(od_id = paste0(Origin, "-", Destination, "-", combination))
########## ----------------------- Decide on the OD pairs we want to analyse ----------------------- ##########

# Option 1: All OD pairs
# Option 2: OD pairs with poor PT supply (many transfers or low travel speed)
# Option 3: OD pairs with poor PT supply and low potential demand

# Option 1:
od_demand_1 <- od_demand_filtered

# Option 2:
od_demand_2 <- od_demand_filtered %>%
# transfers - NA transfers means there is no option to go by bus
filter(n_rides > 1 | is.na(n_rides) |
speed_percentile < 0.5 | is.na(speed_percentile))


# option 3:

# get percentiles
od_demand_3 <- od_demand_filtered %>%
mutate(demand_route_percentile = percent_rank(potential_demand_equal_split),
demand_route_percentile_fct = cut(demand_route_percentile,
breaks = seq(0, 1, by = 0.25),
include.lowest = TRUE))


# od_filtered: keeps od pairs in od_demand_poor_Supply that have low pd on routes
od_demand_3 <- od_demand_3 %>%
filter(od_id %in% od_demand_2$od_id & demand_route_percentile < 0.5)


########## ----------------------- Jitter the points ----------------------- ##########
Expand Down Expand Up @@ -103,13 +151,32 @@ sub_zones <- sub_zones %>%

##### ----- STEP 2: Jittering


# what layer are we jittering
if(option == 1){
od_demand_for_jittering = od_demand_1
} else if(option == 2){
od_demand_for_jittering = od_demand_2
} else if(option == 3){
od_demand_for_jittering = od_demand_3
}


# # --- clear temp directory:
unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
# confirm it's empty
dir(tempdir())

od_demand_for_jittering <- od_demand_for_jittering %>%
select(Origin, Destination, starts_with("commute_"))

# arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details

od_demand_jittered = odjitter::jitter(

# ----- arguments for FLOW DATA ----- #

od = od_demand_filtered,
od = od_demand_for_jittering,
# column in "od" that specifies where trips originate
origin_key = "Origin",
destination_key = "Destination",
Expand Down Expand Up @@ -150,6 +217,37 @@ od_demand_jittered <- od_demand_jittered %>%

# ---------- save output


if(option == 1){

if(mode == FALSE){
# data with "commute_all" only
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_all.geojson"), delete_dsn = TRUE)
} else{
# data with modes
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_mode_all.geojson"), delete_dsn = TRUE)
}
} else if(option == 2){

if(mode == FALSE){
# data with "commute_all" only
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_poor_supply.geojson"), delete_dsn = TRUE)
} else{
# data with modes
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_mode_poor_supply.geojson"), delete_dsn = TRUE)
}
} else if(option == 3){

if(mode == FALSE){
# data with "commute_all" only
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_poor_supply_low_pd.geojson"), delete_dsn = TRUE)
} else{
# data with modes
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_mode_poor_supply_low_pd.geojson"), delete_dsn = TRUE)
}
}


if(mode == FALSE){
# data with "commute_all" only
st_write(od_demand_jittered, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering.geojson"), delete_dsn = TRUE)
Expand Down
83 changes: 68 additions & 15 deletions code/demand_on_buses.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ source("R/filter_od_matrix.R")

########## ----------------------- Read in the data ----------------------- ##########

# is the demand data disaggregated by mode?
mode = FALSE

# ----------- 1. Study area

Expand Down Expand Up @@ -63,16 +65,26 @@ gtfs_bus <- gtfs_bus %>%

# Demand (census) + supply (travel time) data

od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
if(mode == TRUE){
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet"))

# columns to reference (they differ based on geography)
from_id_col = paste0(geography, "21CD_home")
to_id_col = paste0(geography, "21CD_work")
} else{
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet"))
}

# od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))

# # columns to reference (they differ based on geography)
# from_id_col = paste0(geography, "21CD_home")
# to_id_col = paste0(geography, "21CD_work")

# remove intrazone trips
# TODO: do we need to do this?
# od_demand <- od_demand %>%
# filter(.data[[from_id_col]] != .data[[to_id_col]])

od_demand <- od_demand %>%
filter(.data[[from_id_col]] != .data[[to_id_col]])
filter("Origin" != "Destination")

########## ----------------------- Identify which OD demand pairs are served by each bus ----------------------- ##########

Expand Down Expand Up @@ -115,13 +127,22 @@ od_supply_filtered = filter_matrix_by_distance(zones = study_area,
# st_drop_geometry() %>%
# #left_join(od_demand,
# inner_join(od_demand,
# by = c("Origin" = from_id_col, "Destination" = to_id_col))
# by = c("Origin", # = from_id_col,
# "Destination", # = to_id_col,
# "start_time" = "departure_time"))
#
# od_sd <- od_sd %>%
# filter(combination == "pt_wkday_morning")

od_sd <- od_demand %>%
filter(combination == "pt_wkday_morning") %>%
left_join(od_supply_filtered %>%
select(-distance_m) %>%
st_drop_geometry(),
by = c("Origin", # = from_id_col,
"Destination", # = to_id_col,
"departure_time" = "start_time"))

od_sd <- od_supply_filtered %>%
st_drop_geometry() %>%
#left_join(od_demand,
inner_join(od_demand,
by = c("Origin" = from_id_col, "Destination" = to_id_col, "start_time" = "departure_time"))

# # save output
# arrow::write_parquet(od_sd, paste0("data/interim/travel_demand/", toupper(geography), "/od_pairs_demand_and_supply.parquet"))
Expand All @@ -131,28 +152,57 @@ od_sd <- od_supply_filtered %>%

# Method 1: all_to_all
trips_sd_1 <- od_sd %>%
group_by(trip_id, start_time, combination) %>%
group_by(trip_id, departure_time, combination) %>%
summarise(potential_demand_all_to_all = sum(commute_all, na.rm = TRUE)) %>%
ungroup()

# Method 2: frequency-based
trips_sd_2 <- od_sd %>%
group_by(start_time, combination, Origin, Destination) %>%
group_by(departure_time, combination, Origin, Destination) %>%
# get number of passengers on each route for each OD pair
mutate(group_id = cur_group_id(),
frequency_min = 3600/headway_secs,
commute_route = round((commute_all * frequency_min) / sum(frequency_min))) %>%
ungroup() %>%
# sum over the route
group_by(trip_id, start_time, combination) %>%
group_by(trip_id, departure_time, combination) %>%
summarise(potential_demand_freq_based = sum(commute_route, na.rm = TRUE)) %>%
ungroup()

# Method 3: split demand equally between all routes serving OD pair
trips_sd_3 <- od_sd %>%
group_by(departure_time, combination, Origin, Destination) %>%
# get number of passengers on each route for each OD pair
mutate(commute_route = round((commute_all / n()))) %>%
ungroup() %>%
# sum over the route
group_by(trip_id, departure_time, combination) %>%
summarise(potential_demand_equal_split = sum(commute_route, na.rm = TRUE)) %>%
ungroup()


# add all to one df
trips_sd <- trips_sd_1 %>%
left_join(trips_sd_2, by = c("trip_id", "start_time", "combination"))
left_join(trips_sd_2, by = c("trip_id", "departure_time", "combination")) %>%
left_join(trips_sd_3, by = c("trip_id", "departure_time", "combination"))


# add potential demand to original od df
od_trips_sd <- od_sd %>%
left_join(trips_sd, by = c("trip_id", "departure_time", "combination"))

# keep one row per Origin-Destination - the trip with the highest potential demand
od_trips_sd <- od_trips_sd %>%
group_by(Origin, Destination) %>%
dplyr::top_n(1, potential_demand_equal_split) %>%
ungroup()

if(mode == TRUE){
arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))

} else{
arrow::write_parquet(od_trips_sd, paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
}

# ----------- 4. Add geometry to plot results

Expand Down Expand Up @@ -182,6 +232,9 @@ trips_sd_sf_shape_sum <- trips_sd_sf %>%
summarise(across(contains("potential_demand"), ~ sum(.x, na.rm = TRUE))) %>%
ungroup()




# ########### --------------------------- 5. Plots --------------------------- ##########
#
#
Expand Down
28 changes: 23 additions & 5 deletions code/performance_od.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ library(tidyverse)
library(sf)
library(tmap)

library(tidygraph)
library(sfnetworks)
# library(tidygraph)
# library(sfnetworks)


source("R/study_area_geographies.R")
source("R/filter_od_matrix.R")

geography = "MSOA"
# are we processing the data that is disaggregated by mode?
mode = FALSE

# --- where do we want to save the plots?
plots_path <- paste0("data/processed/plots/eda/od_performance/", geography, "/")
Expand All @@ -38,7 +40,12 @@ study_area <- study_area %>%

# ----- Travel time and demand matrix

od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
if(mode == TRUE){
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))

} else{
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
}



Expand All @@ -60,7 +67,7 @@ od_demand <- od_demand %>%
# filter od pairs by euclidian distance
od_demand_sf = filter_matrix_by_distance(zones = study_area,
od_matrix = od_demand,
dist_threshold = 1000)
dist_threshold = 500)


# ----------- 3. Get travel speeds
Expand Down Expand Up @@ -99,8 +106,19 @@ od_demand_sf_rank <- od_demand_sf %>%
ungroup()


# ---------- 5. Save the output

if(mode == TRUE){
arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed.parquet"))

} else{
arrow::write_parquet(st_drop_geometry(od_demand_sf_rank), paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed.parquet"))
}




# ---------- 5. Map the results
# ---------- 6. Map the results


# Lwd: Demand between OD pairs,
Expand Down

0 comments on commit 793752c

Please sign in to comment.