options(repos = c(CRAN = "https://cloud.r-project.org"))
if (!require("remotes")) install.packages("remotes")
pkgs = c(
"sf",
"tidyverse",
"osmextract",
"tmap",
"maptiles"
)
remotes::install_cran(pkgs)
sapply(pkgs, require, character.only = TRUE)Seminar 1 - Mini-workshop
The best way to learn is by exploring data and answering your own questions. Here are some datasets that can help you investigate questions like:
What is the Average Daily People/Bikes/Passengers/Cars?
What is the typical daily/weekly/monthly demand profile?
Where are the points with the highest demand/flows?
1 Some interesting datasets …
Let’s explore some interesting datasets. First we will install (if necessary) and load the packages for this examples
1.1 Motorised vehicles counts: Leeds
Many cities/countries publish data from permanent traffic counters e.g. ANPR cameras, induction loops or low-cost sensors. We are going to use data from the sensors in Leeds (available in Data Mill North)
leeds_car_location <- read_csv(
"https://datamillnorth.org/download/e6q0n/9bc51361-d98e-47d3-9963-aeeca3fa0afc/Camera%20Locations.csv"
)
leeds_car_location_sf <- leeds_car_location |>
st_as_sf(coords = c("X","Y"),
crs = 27700)leeds_car_2019 <- read_csv(
"https://datamillnorth.org/download/e6q0n/9e62c1e5-8ba5-4369-9d81-a46c4e23b9fb/Data%202019.csv"
)If you are interested in open traffic count datasets see this
1.1.0.1 code
leeds_car_2019 |>
group_by(Cosit) |>
summarise(mean(Volume))
mean_daily_volumes <- leeds_car_2019 |>
# converting cosit to numeric
mutate(Cosit = as.numeric(Cosit)) |>
# extracting the date
mutate(time_date = dmy_hm(Sdate),
# extracts the day
date = date(time_date)) |>
# calculating the total flows for each day
summarise(Volume = sum(Volume,rm.na = T),
.by = c(date,Cosit)) |>
# Calculating the daily mean
summarise(daily_volume = mean(Volume,rm.na = T),
.by = Cosit)
daily_volumes <- leeds_car_2019 |>
# converting cosit to numeric
mutate(Cosit = as.numeric(Cosit)) |>
# extracting the date
mutate(time_date = dmy_hm(Sdate),
# extracts the day
date = date(time_date)) |> # calculating the total flows for each day
summarise(mean_volume = sum(Volume,rm.na = T),
.by = c(date,Cosit))
daily_volumes |>
mutate(Cosit = as.numeric(Cosit)) |>
filter(Cosit == 90201)|>
ggplot(aes(x = date,y = mean_volume))+
geom_line()
mean_daily_volumes |>
ggplot(aes(daily_volume))+
geom_histogram()
leeds_car_location_sf |>
left_join(mean_daily_volumes,by = c("Site ID"="Cosit")) |>
tm_shape()+
tm_dots("daily_volume",size = "daily_volume")1.2 Cycle counts for West Yorkshire
Some cities would have some dedicated infrastructure to count the number of people using bikes at strategic points of the city. We are going to use some cycle counters from West Yorkshire that you can find here:
leeds_bike_location <- read_csv(
"https://datamillnorth.org/download/e1dmk/a8c8a11e-1616-4915-a897-9ca5ab4e03b8/Cycle%20Counter%20Locations.csv",skip = 1
)
leeds_bike_location_sf <- leeds_bike_location |>
drop_na(Latitude,Longitude) |>
st_as_sf(coords = c("Longitude","Latitude"),
crs = 4326) |>
st_transform(27700)The data for 2019:
leeds_bike_2019 <- read_csv(
"https://datamillnorth.org/download/e1dmk/f13f5d49-6128-4619-a3ff-e6e12f88a71f/Cycle%20Data%202019.csv"
)Other interesting datasets for you to explore are Paris cycling counters or Scotland.
1.3 Pedestrian Counts: Melbourne
Cities also monitor the number pedestrians in key locations. We can use data from the sensors in Melbourne accessible here:
melbourne_locations_sf <- st_read("https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/pedestrian-counting-system-sensor-locations/exports/geojson?lang=en&timezone=Europe%2FLondon")We will extract
melbourne_dec2024 <- read_csv("https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/pedestrian-counting-system-monthly-counts-per-hour/exports/csv?lang=en&refine=sensing_date%3A%222024%2F12%22&timezone=Australia%2FMelbourne&use_labels=true&delimiter=%2C")1.4 Public transport tap-in data: Bogotá
Public transport ridership data can be difficult to obtain. Fortunately, some cities which have systems managed by a public organisation make this data available for the public. Bogotá’s integrated transport system publishes the tap-in data for the BRT system (see this). We will use one of the daily reports.
tm_stations_sf <- st_read("Estaciones_Troncales_de_TRANSMILENIO.geojson")Monthly boarding data can be manually obtained in the open data portal of TransMilenio here
url_tm <- "https://storage.googleapis.com/validaciones_tmsa/ValidacionTroncal/2024/consolidado_2024.zip"
u_bn <- basename(url_tm)
if(!file.exists(u_bn)){
download.file(url = url_tm,
destfile = u_bn,
mode = "wb")
}
url_tm <- "https://storage.googleapis.com/validaciones_tmsa/ValidacionTroncal/2024/consolidado_2024.zip"
tm_brt_2024 <- read_csv(unz(u_bn,"troncal_2024.csv"))TfL’s crowding data is also a great source of ridership data. See this.
1.4.1 code
daily_tapins <- tm_brt_2024 |>
summarise(validaciones = sum(validaciones),
.by = c(Estacion_Parada,fecha)) |>
summarise(validaciones = mean(validaciones),
.by = Estacion_Parada) |>
mutate(numero_estacion = str_extract(Estacion_Parada,"\\(\\d*\\)") |>
str_remove_all("(\\(|\\))"))
tm_stations_sf |>
left_join(daily_tapins,by = "numero_estacion") |>
tm_shape()+
tm_dots("validaciones",size = "validaciones")1.5 Network data from OSM
You may be already familiar with getting and using OSM data. This an example of how to obtain the network that can be used for pedestrians.
my_coordinates <- c(-76.78893552474851,18.01206727612776)
sf_point <- st_point(my_coordinates) |> st_sfc(crs = 4326)
sf_buffer <- st_buffer(sf_point,dist = 15e3)tm_basemap("OpenStreetMap")+
tm_shape(sf_buffer)+
tm_borders()my_network <- oe_get_network(sf_buffer, mode = "walking")tm_shape(my_network)+
tm_lines("highway")Note: you can access a simplified network dataset from Ordnance Survey’s OpenRoads dataset.
2 Links to other resources
- See here for a detailed reproducible example of how to visualise transport network datasets: https://github.com/juanfonsecaLS1/netvis-trafficflows
- See the
sfnetworkspackage package for information on how to work with spatial networks, including estimating centality measures and shortest paths.