options(repos = c(CRAN = "https://cloud.r-project.org"))
if (!require("remotes")) install.packages("remotes")
= c(
pkgs "sf",
"tidyverse",
"osmextract",
"tmap",
"maptiles"
)::install_cran(pkgs)
remotessapply(pkgs, require, character.only = TRUE)
Seminar 1 - Mini-workshop
The best way to learn is by exploring data and answering your own questions. Here are some datasets that can help you investigate questions like:
What is the Average Daily People/Bikes/Passengers/Cars?
What is the typical daily/weekly/monthly demand profile?
Where are the points with the highest demand/flows?
1 Some interesting datasets …
Let’s explore some interesting datasets. First we will install (if necessary) and load the packages for this examples
1.1 Motorised vehicles counts: Leeds
Many cities/countries publish data from permanent traffic counters e.g. ANPR cameras, induction loops or low-cost sensors. We are going to use data from the sensors in Leeds (available in Data Mill North)
<- read_csv(
leeds_car_location "https://datamillnorth.org/download/e6q0n/9bc51361-d98e-47d3-9963-aeeca3fa0afc/Camera%20Locations.csv"
)
<- leeds_car_location |>
leeds_car_location_sf st_as_sf(coords = c("X","Y"),
crs = 27700)
<- read_csv(
leeds_car_2019 "https://datamillnorth.org/download/e6q0n/9e62c1e5-8ba5-4369-9d81-a46c4e23b9fb/Data%202019.csv"
)
If you are interested in open traffic count datasets see this
1.1.0.1 code
|>
leeds_car_2019 group_by(Cosit) |>
summarise(mean(Volume))
<- leeds_car_2019 |>
mean_daily_volumes # converting cosit to numeric
mutate(Cosit = as.numeric(Cosit)) |>
# extracting the date
mutate(time_date = dmy_hm(Sdate),
# extracts the day
date = date(time_date)) |>
# calculating the total flows for each day
summarise(Volume = sum(Volume,rm.na = T),
.by = c(date,Cosit)) |>
# Calculating the daily mean
summarise(daily_volume = mean(Volume,rm.na = T),
.by = Cosit)
<- leeds_car_2019 |>
daily_volumes # converting cosit to numeric
mutate(Cosit = as.numeric(Cosit)) |>
# extracting the date
mutate(time_date = dmy_hm(Sdate),
# extracts the day
date = date(time_date)) |> # calculating the total flows for each day
summarise(mean_volume = sum(Volume,rm.na = T),
.by = c(date,Cosit))
|>
daily_volumes mutate(Cosit = as.numeric(Cosit)) |>
filter(Cosit == 90201)|>
ggplot(aes(x = date,y = mean_volume))+
geom_line()
|>
mean_daily_volumes ggplot(aes(daily_volume))+
geom_histogram()
|>
leeds_car_location_sf left_join(mean_daily_volumes,by = c("Site ID"="Cosit")) |>
tm_shape()+
tm_dots("daily_volume",size = "daily_volume")
1.2 Cycle counts for West Yorkshire
Some cities would have some dedicated infrastructure to count the number of people using bikes at strategic points of the city. We are going to use some cycle counters from West Yorkshire that you can find here:
<- read_csv(
leeds_bike_location "https://datamillnorth.org/download/e1dmk/a8c8a11e-1616-4915-a897-9ca5ab4e03b8/Cycle%20Counter%20Locations.csv",skip = 1
)
<- leeds_bike_location |>
leeds_bike_location_sf drop_na(Latitude,Longitude) |>
st_as_sf(coords = c("Longitude","Latitude"),
crs = 4326) |>
st_transform(27700)
The data for 2019:
<- read_csv(
leeds_bike_2019 "https://datamillnorth.org/download/e1dmk/f13f5d49-6128-4619-a3ff-e6e12f88a71f/Cycle%20Data%202019.csv"
)
Other interesting datasets for you to explore are Paris cycling counters or Scotland.
1.3 Pedestrian Counts: Melbourne
Cities also monitor the number pedestrians in key locations. We can use data from the sensors in Melbourne accessible here:
<- st_read("https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/pedestrian-counting-system-sensor-locations/exports/geojson?lang=en&timezone=Europe%2FLondon") melbourne_locations_sf
We will extract
<- read_csv("https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/pedestrian-counting-system-monthly-counts-per-hour/exports/csv?lang=en&refine=sensing_date%3A%222024%2F12%22&timezone=Australia%2FMelbourne&use_labels=true&delimiter=%2C") melbourne_dec2024
1.4 Public transport tap-in data: Bogotá
Public transport ridership data can be difficult to obtain. Fortunately, some cities which have systems managed by a public organisation make this data available for the public. Bogotá’s integrated transport system publishes the tap-in data for the BRT system (see this). We will use one of the daily reports.
<- st_read("Estaciones_Troncales_de_TRANSMILENIO.geojson") tm_stations_sf
Monthly boarding data can be manually obtained in the open data portal of TransMilenio here
<- "https://storage.googleapis.com/validaciones_tmsa/ValidacionTroncal/2024/consolidado_2024.zip"
url_tm <- basename(url_tm)
u_bn
if(!file.exists(u_bn)){
download.file(url = url_tm,
destfile = u_bn,
mode = "wb")
}
<- "https://storage.googleapis.com/validaciones_tmsa/ValidacionTroncal/2024/consolidado_2024.zip"
url_tm
<- read_csv(unz(u_bn,"troncal_2024.csv")) tm_brt_2024
TfL’s crowding data is also a great source of ridership data. See this.
1.4.1 code
<- tm_brt_2024 |>
daily_tapins summarise(validaciones = sum(validaciones),
.by = c(Estacion_Parada,fecha)) |>
summarise(validaciones = mean(validaciones),
.by = Estacion_Parada) |>
mutate(numero_estacion = str_extract(Estacion_Parada,"\\(\\d*\\)") |>
str_remove_all("(\\(|\\))"))
|>
tm_stations_sf left_join(daily_tapins,by = "numero_estacion") |>
tm_shape()+
tm_dots("validaciones",size = "validaciones")
1.5 Network data from OSM
You may be already familiar with getting and using OSM data. This an example of how to obtain the network that can be used for pedestrians.
<- c(-76.78893552474851,18.01206727612776)
my_coordinates <- st_point(my_coordinates) |> st_sfc(crs = 4326)
sf_point <- st_buffer(sf_point,dist = 15e3) sf_buffer
tm_basemap("OpenStreetMap")+
tm_shape(sf_buffer)+
tm_borders()
<- oe_get_network(sf_buffer, mode = "walking") my_network
tm_shape(my_network)+
tm_lines("highway")
Note: you can access a simplified network dataset from Ordnance Survey’s OpenRoads dataset.
2 Links to other resources
- See here for a detailed reproducible example of how to visualise transport network datasets: https://github.com/juanfonsecaLS1/netvis-trafficflows
- See the
sfnetworks
package package for information on how to work with spatial networks, including estimating centality measures and shortest paths.