Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pefromance enhancements #33

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ Imports:
geodist,
od (>= 0.4.0),
rlang,
sf
sf,
nngeo,
data.table
Suggests:
ggplot2,
knitr,
Expand Down
60 changes: 60 additions & 0 deletions R/poinds_to_od_maxdist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' COnvert points to OD with a maxiumum distance
#'
#'
#' @param p A spatial points object or a matrix of coordinates representing points
#' @param pd Optional spatial points object or matrix objects representing destinations
#' @param max_dist Maximum distance in metres
#' @param max_dest The maximum number of destinations for each origin (numeric).
#' Default is Inf. Alternative to max_dist for limiting the number of ODs.
#' @param intrazonal Should the result only include interzonal OD pairs, in which
#' the ID of the origin is different from the ID of the destination zone?
#' `FALSE` by default
#' @param ids_only Should a data frame with only 2 columns (origin and destination IDs)
#' be returned? The default is `FALSE`, meaning the result should also contain the
#' coordinates of the start and end points of each OD pair.
#' @return An sf data frame
#' @export

points_to_od_maxdist = function(p, pd = NULL, max_dist = Inf, max_dest = Inf, intrazonal = FALSE, ids_only = FALSE){

if(any(sf::st_geometry_type(p) != "POINT")){
message("Converting to centroids")
suppressWarnings(p <- sf::st_centroid(p))
}

if(!is.null(pd)){
if(any(sf::st_geometry_type(pd) != "POINT")){
suppressWarnings(pd <- sf::st_centroid(pd))
}
} else {
pd = p
}

if(max_dest > nrow(pd)){
max_dest = nrow(pd)
}

nn <- nngeo::st_nn(p, pd, k = max_dest, maxdist = max_dist, returnDist = TRUE)
res = data.frame(O = rep(p[[1]], lengths(nn$nn)),
D = pd[[1]][unlist(nn$nn, use.names = FALSE)],
distance_euclidean = unlist(nn$dist, use.names = FALSE))

if(intrazonal){
res = res[res$O != res$D,]
}

if(ids_only){
return(res)
}

# Add Coords
p_coords = as.data.frame(sf::st_coordinates(p))
names(p_coords) = c("X1","Y1")

pd_coords = as.data.frame(sf::st_coordinates(pd))
names(pd_coords) = c("X2","Y2")

res = cbind(res, p_coords[match(res$O, p[[1]]),])
res = cbind(res, pd_coords[match(res$D, pd[[1]]),])
res
}
44 changes: 9 additions & 35 deletions R/si_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ si_calculate = function(
dots = rlang::enquos(...)
od = dplyr::mutate(od, "{output_col}" := fun(!!!dots))
if (!missing(constraint_production)) {
od = constrain_production(od, output_col, {{constraint_production}})
od[[output_col]] = constrain(od[[1]], od[[output_col]], od[[constraint_production]])
}
if (!missing(constraint_attraction)) {
od = constrain_attraction(od, output_col, {{constraint_attraction}})
od[[output_col]] = constrain(od[[2]], od[[output_col]], od[[constraint_production]])
}
if (!missing(constraint_total)) {
od = constrain_total(od, output_col, constraint_total)
od[[output_col]] = constrain_total(od, output_col, constraint_total)
}
od
}
Expand All @@ -87,47 +87,21 @@ si_predict = function(
) {
od[[output_col]] = stats::predict(model, od)
if (!missing(constraint_production)) {
od = constrain_production(od, output_col, {{constraint_production}})
od[[output_col]] = constrain(od[[1]], od[[output_col]], od[[constraint_production]])
}
if (!missing(constraint_attraction)) {
od = constrain_attraction(od, output_col, {{constraint_attraction}})
od[[output_col]] = constrain(od[[1]], od[[output_col]], od[[constraint_attraction]])
}
if (!missing(constraint_total)) {
od = constrain_total(od, output_col, constraint_total)
}
od
}

constrain_production = function(od, output_col, constraint_production) {
# todo: should the grouping var (the first column, 1) be an argument?
od_grouped = dplyr::group_by_at(od, 1)
od_grouped = dplyr::mutate(
od_grouped,
"{output_col}" := .data[[output_col]] /
sum(.data[[output_col]]) * dplyr::first( {{constraint_production}} )
)
# # Assert values are correct for test data:
# od_grouped |>
# select(origin_all, interaction)
# od_grouped |>
# sf::st_drop_geometry() |>
# # group_by(1) |>
# summarise(
# sum = sum(interaction),
# first = first(origin_all)
# )
od = dplyr::ungroup(od_grouped)
od
}

constrain_attraction = function(od, output_col, constraint_attraction) {
# todo: should the grouping var (the first column, 2) be an argument?
od_grouped = dplyr::group_by_at(od, 2)
od_grouped = dplyr::mutate(
od_grouped,
"{output_col}" := .data[[output_col]] /
sum(.data[[output_col]]) * dplyr::first( {{constraint_attraction}} )
)
constrain = function(grp, out, conts) {
dt = data.table::data.table(grp = grp, out = out, conts = conts)
dt = dt[,out := out / sum(out) * data.table::first(conts), grp]
return(dt$out)
}

constrain_total = function(od, output_col, constraint_total) {
Expand Down
87 changes: 33 additions & 54 deletions R/si_to_od.R
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
#' Prepare OD data frame
#'
#' Prepares an OD data frame that next could be used to
#' estimate movement between origins and destinations
#' with a spatial interaction model.
#'
#' Prepares an OD data frame that next could be used to estimate movement
#' between origins and destinations with a spatial interaction model.
#'
#' In most origin-destination datasets the spatial entities that constitute
#' origins (typically administrative zones) also represent destinations.
#' In this 'unipartite' case `origins` and `destinations` should be passed
#' the same object, an `sf` data frame representing administrative zones.
#'
#' 'Bipartite' datasets, by contrast, represent
#' "spatial interaction systems where origins cannot act as
#' destinations and vice versa" (Hasova et al. [2022](https://lenkahas.com/files/preprint.pdf)).
#'
#' a different
#' `sf` object can be passed to the `destinations` argument.
#'
#' origins (typically administrative zones) also represent destinations. In this
#' 'unipartite' case `origins` and `destinations` should be passed the same
#' object, an `sf` data frame representing administrative zones.
#'
#' 'Bipartite' datasets, by contrast, represent "spatial interaction systems
#' where origins cannot act as destinations and vice versa" (Hasova et al.
#' [2022](https://lenkahas.com/files/preprint.pdf)).
#'
#' a different `sf` object can be passed to the `destinations` argument.
#'
#' @param origins `sf` object representing origin locations/zones
#' @param destinations `sf` object representing destination locations/zones
#' @param max_dist Euclidean distance in meters (numeric).
#' Only OD pairs that are this distance apart or less will be returned
#' and therefore included in the SIM.
#' @param intrazonal Include intrazonal OD pairs?
#' Intrazonal OD pairs represent movement from one
#' place in a zone to another place in the same zone.
#' `TRUE` by default.
#' @param max_dist Euclidean distance in meters (numeric). Only OD pairs that
#' are this distance apart or less will be returned and therefore included in
#' the SIM.
#' @param max_dest The maximum number of destinations for each origin (numeric).
#' Default is Inf. Alternative to max_dist for limiting the number of ODs.
#' @param intrazonal Include intrazonal OD pairs? Intrazonal OD pairs represent
#' movement from one place in a zone to another place in the same zone. `TRUE`
#' by default.
#' @return An sf data frame
#' @export
#' @examples
Expand All @@ -44,51 +43,31 @@
#' odsf = si_to_od(origins, destinations)
#' nrow(odsf) # no intrazonal flows
#' plot(odsf)
si_to_od = function(origins, destinations, max_dist = Inf, intrazonal = TRUE) {
if(identical(origins, destinations)) {
od_df = od::points_to_od(origins)
} else {
od_df = od::points_to_od(origins, destinations)
}
od_df$distance_euclidean = geodist::geodist(
x = od_df[c("ox", "oy")],
y = od_df[c("dx", "dy")],
paired = TRUE
)
# Max dist check
if(max(od_df$distance_euclidean) > max_dist) {
nrow_before = nrow(od_df)
od_df = od_df[od_df$distance_euclidean <= max_dist, ]
nrow_after = nrow(od_df)
pct_kept = round(nrow_after / nrow_before * 100)
message(
nrow_after,
" OD pairs remaining after removing those with a distance greater than ", # nolint
max_dist, " meters", ":\n",
pct_kept, "% of all possible OD pairs"
)
}

si_to_od = function(origins,
destinations,
max_dist = 10000,
max_dest = Inf,
intrazonal = TRUE) {

# Intrazonal check
if(!intrazonal){
od_df = dplyr::filter(od_df, distance_euclidean > 0)
}
od_df = points_to_od_maxdist(origins, destinations, max_dist = max_dist, intrazonal = intrazonal, max_dest = max_dest)

od_sfc = od::odc_to_sfc(od_df[3:6])
sf::st_crs(od_sfc) = 4326 # todo: add CRS argument?
od_df = od_df[-c(3:6)]
od_sfc = od::odc_to_sfc(od_df[4:7])
sf::st_crs(od_sfc) = sf::st_crs(origins)
od_df = od_df[-c(4:7)]

# join origin attributes
origins_to_join = sf::st_drop_geometry(origins)
names(origins_to_join) = paste0("origin_", names(origins_to_join))
names(origins_to_join)[1] = names(od_df)[1]
od_dfj = dplyr::inner_join(od_df, origins_to_join, by = "O")

# join destination attributes
destinations_to_join = sf::st_drop_geometry(destinations)
names(destinations_to_join) = paste0("destination_", names(destinations_to_join)) # nolint
names(destinations_to_join)[1] = names(od_df)[2]
od_dfj = dplyr::inner_join(od_dfj, destinations_to_join, by = "D")
# names(od_dfj)

# create and return sf object
sf::st_sf(od_dfj, geometry = od_sfc)
}
Loading