R/aggregate.R
od_disaggregate.Rd
This function is for splitting-up OD pairs.
It increases the number of rows in an OD dataset, while aiming
to keep the amount of travel represented in the data the same.
To take an analogy from another package, it's roughly equivalent
to tidyr::pivot_longer()
.
od_disaggregate(
od,
z,
subpoints = NULL,
code_append = "_ag",
population_column = 3,
max_per_od = 5,
keep_ids = TRUE,
integer_outputs = FALSE
)
od_split(
od,
z,
subpoints = NULL,
code_append = "_ag",
population_column = 3,
max_per_od = 5,
keep_ids = TRUE,
integer_outputs = FALSE
)
An origin-destination data frame
Zones representing origins and destinations
Points, lines or polygons within the zones. These define the OD data start/end points.
The name of the column containing aggregate zone names
The column containing the total population (if it exists)
Maximum flow in the population_column to assign per OD pair. This only comes into effect if there are enough subpoints to choose from.
Should the origin and destination ids be kept?
TRUE
by default, meaning 2 extra columns are appended, with the
names o_agg
and d_agg
containing IDs from the original OD data.
Should integer outputs be returned? FALSE
by default.
Note: there is a known issue when integer results are generated. See
https://github.com/ITSLeeds/od/issues/31 for details.
An alias for the function is od_split()
.
od = od_data_df[1:2, c(1, 2, 9)]
od
#> geo_code1 geo_code2 bicycle
#> 1 E02002384 E02006875 13
#> 2 E02002404 E02006875 10
zones = od::od_data_zones_min
od_sf = od_to_sf(od, zones)
#> 0 origins with no match in zone ids
#> 0 destinations with no match in zone ids
#> points not in od data removed.
set.seed(2021) # for reproducibility
od_disag = od_disaggregate(od, zones)
#> Creating origin and destination points at random locations
od_disag2 = od_disaggregate(od, zones, max_per_od = 11)
#> Creating origin and destination points at random locations
plot(zones$geometry)
plot(od_sf$geometry, lwd = 9, add = TRUE)
plot(od_disag$geometry, col = "grey", lwd = 1, add = TRUE)
plot(od_disag2$geometry, col = "green", lwd = 1, add = TRUE)
table(od_disag$o_agg, od_disag$d_agg)
#>
#> E02006875
#> E02002384 3
#> E02002404 2
# integer results
od_disaggregate(od, zones, integer_outputs = TRUE)
#> Creating origin and destination points at random locations
#> Simple feature collection with 5 features and 5 fields
#> Geometry type: LINESTRING
#> Dimension: XY
#> Bounding box: xmin: -1.557405 ymin: 53.7873 xmax: -1.515148 ymax: 53.81147
#> Geodetic CRS: WGS 84
#> o d bicycle o_agg d_agg geometry
#> 1 2 9 4 E02002384 E02006875 LINESTRING (-1.556161 53.81...
#> 2 1 6 4 E02002384 E02006875 LINESTRING (-1.534144 53.80...
#> 3 3 8 5 E02002384 E02006875 LINESTRING (-1.552041 53.81...
#> 4 4 10 5 E02002404 E02006875 LINESTRING (-1.533423 53.79...
#> 5 5 7 5 E02002404 E02006875 LINESTRING (-1.515148 53.78...
# with more trips per disaggregated OD pair:
disag = od_disaggregate(od_data_df[1:2, ], z = zones, max_per_od = 50)
#> Creating origin and destination points at random locations
plot(disag[0])
# with subpoints
subpoints = sf::st_sample(zones, 100)
od_disag_subpoints = od_disaggregate(od, zones, subpoints = subpoints)
plot(subpoints)
plot(od_disag_subpoints$geometry, add = TRUE)
# with buildings data
od_disag_buildings = od_disaggregate(od, zones, od_data_buildings)
#> Converting polygons to points
#> Warning: st_centroid assumes attributes are constant over geometries
summary(od_disag_buildings)
#> o d bicycle o_agg
#> Length:5 Length:5 Min. :4.333 Length:5
#> Class :character Class :character 1st Qu.:4.333 Class :character
#> Mode :character Mode :character Median :4.333 Mode :character
#> Mean :4.600
#> 3rd Qu.:5.000
#> Max. :5.000
#> d_agg geometry
#> Length:5 LINESTRING :5
#> Class :character epsg:4326 :0
#> Mode :character +proj=long...:0
#>
#>
#>
plot(od_data_buildings$geometry)
plot(od_disag_buildings[3], add = TRUE)
# mapview::mapview(od_disag_buildings)
od = od_data_df[1:2, 1:4]
subzones = od_data_zones_small
try(od_disaggregate(od, zones, subzones))
#> Converting polygons to points
#> Warning: st_centroid assumes attributes are constant over geometries
#> Error in sample.int(length(x), size, replace, prob) :
#> cannot take a sample larger than the population when 'replace = FALSE'
od_disag = od_disaggregate(od, zones, subzones, max_per_od = 500)
#> Converting polygons to points
#> Warning: st_centroid assumes attributes are constant over geometries
ncol(od_disag) - 3 == ncol(od) # same number of columns, the same...
#> [1] TRUE
# Except disag data gained geometry and new agg ids:
sum(od_disag[[3]]) == sum(od[[3]])
#> [1] TRUE
sum(od_disag[[4]]) == sum(od[[4]])
#> [1] TRUE
plot(od_disag)
# test with road network dataset (don't run as time consuming):
if (FALSE) { # \dontrun{
od_disag_net = od_disaggregate(od, zones, od_road_network, max_per_od = 500)
plot(zones$geometry)
plot(od_road_network$geometry, add = TRUE, col = "green")
plot(od_disag_net$geometry, add = TRUE)
mapview::mapview(zones) + od_disag_net + od_road_network
} # }