Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

29 cpc data #31

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion code/demand_cluster_flows.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ source("R/dbscan_sensitivity.R")
########## ----------------------- Read in the data ----------------------- ##########
geography <- "MSOA"

od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"))
# od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson"))
od_demand_jittered <- st_read(paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson"))

# path tp save plots
plots_path <- paste0("data/processed/plots/eda/od_clustering/", geography, "/")
Expand Down
158 changes: 123 additions & 35 deletions code/demand_cluster_flows_prep.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# the purpose of this script is to prepare the flow data for spatial clustering #
# This is done by jittering the flows so that it is not concentrated at zone centroids

#
library(tidyverse)
library(sf)
#library(lwgeom)
Expand All @@ -18,8 +18,7 @@ source("R/filter_od_matrix.R")
########## ----------------------- Read in the data ----------------------- ##########

# is the data disaggregated by mode?
mode <- TRUE
#mode <- FALSE
mode <- FALSE


# ----------- 1. Study area
Expand All @@ -46,20 +45,12 @@ study_area <- study_area %>%

# Demand (census) + supply (travel time) data

if(mode == FALSE){
# data with "commute_all" only
#od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), ".parquet"))
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))
} else{
# data with modes
#od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode.parquet"))
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/od_census_2021/demand_study_area_", tolower(geography), "_mode_with_speed_and_pd.parquet"))
}
od_demand <- arrow::read_parquet(paste0("data/raw/travel_demand/cpc_matrices_2019/demand_study_area_", tolower(geography), "_with_speed_and_pd.parquet"))

# filter to specific combination
# TODO: get seperate flows for car and pt, and keep two combinations
od_demand <- od_demand %>%
filter(combination == "pt_wkday_morning")
# # filter to specific combination
# # TODO: get seperate flows for car and pt, and keep two combinations
# od_demand <- od_demand %>%
# filter(combination == "pt_wkday_morning")

od_demand <- od_demand %>%
select(-distance_m)
Expand Down Expand Up @@ -115,7 +106,111 @@ sub_zones <- sub_zones %>%
rename(population = gbr_pd_2020_1km_UNadj)


# --- Create more points (if we don't have enough points, then odjitter may not find a feasible solution)


#' Split Points into Multiple Offset Points with CRS Transformation
#'
#' This function takes an `sf` object containing spatial points and splits each
#' point into multiple new points that are offset from the original points.
#' Each new point represents an equal share of a specified numeric column value,
#' and is randomly distributed within a given distance range from the original
#' point. The function transforms the CRS to a specified metric CRS for
#' processing and then converts it back to the original CRS.
#'
#' @param sf_points An `sf` object representing multiple spatial points with
#' geometry and a numeric column to be split.
#' @param col_to_split A string indicating the name of the numeric column to
#' be split across the new points.
#' @param splits An integer indicating the number of new points to generate
#' from each original point.
#' @param offset_dist_min A numeric value specifying the minimum distance
#' (in meters) for the offsets of the new points from the original points.
#' @param offset_dist_max A numeric value specifying the maximum distance
#' (in meters) for the offsets of the new points from the original points.
#' @param target_crs A numeric EPSG code or a PROJ string for the target
#' metric CRS to be used during processing.
#'
#' @return An `sf` object containing the new points, with the specified
#' numeric column value equally distributed among the generated points.
#'
#' @examples
#' # Example usage
#' library(sf)
#' sf_points <- st_as_sf(data.frame(id = 1,
#' population = 100,
#' geometry = st_sfc(st_point(c(1, 1)),
#' crs = 4326)))
#' result <- split_points(sf_points, col_to_split = "population",
#' splits = 5, offset_dist_min = 10,
#' offset_dist_max = 20, target_crs = 32630)
#' plot(st_geometry(result))
split_points <- function(sf_points, col_to_split, splits, offset_dist_min,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice function!

offset_dist_max, target_crs) {

# Ensure that the specified column exists in the sf object
if (!col_to_split %in% names(sf_points)) {
stop(paste("Column", col_to_split, "does not exist in the sf object."))
}

# Store the original CRS
original_crs <- st_crs(sf_points)

# Transform to the target CRS
sf_points_metric <- st_transform(sf_points, target_crs)

# Create an empty list to store new points
new_points_list <- vector("list", nrow(sf_points_metric))

# Loop through each point in the sf object
for (i in 1:nrow(sf_points_metric)) {
point <- sf_points_metric[i, ]
population <- point[[col_to_split]] # Get the value of the specified column

# Calculate new population per point
new_col_to_split <- population / splits

# Generate new points with offsets
offsets <- lapply(1:splits, function(index) {
angle <- runif(1, 0, 2 * pi) # Random angle in radians
offset_dist <- runif(1, min = offset_dist_min, max = offset_dist_max)
offset_x <- offset_dist * cos(angle)
offset_y <- offset_dist * sin(angle)
st_point(c(st_coordinates(point)[1] + offset_x,
st_coordinates(point)[2] + offset_y))
})

# Create sf object for new points
new_points_sf <- st_sf(
# TODO: take column name from funciton argument instead of hardcoding
population = rep(new_col_to_split, splits), # Dynamically assign the column name
geometry = st_sfc(offsets, crs = st_crs(point))
)

# Append the new points sf object to the list
new_points_list[[i]] <- new_points_sf
}

# Combine all new points into a single sf object
result_sf <- do.call(rbind, new_points_list)

# Transform back to the original CRS
result_sf <- st_transform(result_sf, original_crs)

return(result_sf)
}

# apply function
sub_zones_2 <- split_points(sub_zones,
col_to_split = "population",
splits = 5,
offset_dist_min = 10,
offset_dist_max = 20,
target_crs = 3857)



#######

##### ----- STEP 2: Jittering

Expand All @@ -124,8 +219,11 @@ unlink(paste0(normalizePath(tempdir()), "/", dir(tempdir())), recursive = TRUE)
# confirm it's empty
dir(tempdir())

selected_combination = "pt_wkday_morning"

od_demand_for_jittering <- od_demand_filtered %>%
select(Origin, Destination, starts_with("commute_"))
#filter(combination == selected_combination) %>%
select(Origin, Destination, total_flow, combination)

# arguments are here: https://github.com/dabreegster/odjitter?tab=readme-ov-file#details

Expand All @@ -138,9 +236,9 @@ od_demand_jittered = odjitter::jitter(
origin_key = "Origin",
destination_key = "Destination",
# column with the flows (to be disaggregated)
disaggregation_key = "commute_all",
disaggregation_key = "total_flow",
# What's the maximum number of trips per output OD row that's allowed?
disaggregation_threshold = 95,
disaggregation_threshold = 50,

# ----- arguments for ZONES ----- #

Expand All @@ -150,7 +248,7 @@ od_demand_jittered = odjitter::jitter(
# ----- Arguments for SUBPOINTS ----- #

# subpoints to jitter origins and destinations to
subpoints = sub_zones,
subpoints = sub_zones_2,
# # alternatively, define different points for origins and destinations
# subpoints_origins = points_home,
# subpoints_destinations = points_work,
Expand All @@ -167,9 +265,12 @@ od_demand_jittered = odjitter::jitter(
deduplicate_pairs = TRUE
)

# TODO: remove when jittering is fixed
# od_demand_jittered <- od_demand_filtered

# jittered returns fractions. Round them
od_demand_jittered <- od_demand_jittered %>%
mutate(across(starts_with("commute_"), round))
mutate(across(starts_with("total_flow"), round))



Expand Down Expand Up @@ -227,15 +328,7 @@ od_demand_jittered_scenarios <- od_demand_jittered %>%

### Save the sfs for each scenario


if(mode == FALSE){
# data with "commute_all" only
st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios.geojson"), delete_dsn = TRUE)

} else{
# data with modes
st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_mode.geojson"), delete_dsn = TRUE)
}
st_write(od_demand_jittered_scenarios, paste0("data/interim/travel_demand/", geography, "/od_demand_jittered_for_clustering_scenarios_temporal.geojson"), delete_dsn = TRUE)



Expand Down Expand Up @@ -363,8 +456,3 @@ if(mode == FALSE){
#
# # density plot: facet by demand percentile: Keep potential_demand_equal_split = NA (replace with 0)
#
#
#
#
#
#
Loading