Get Data for the Course

1 Overview

This document contains R code to download, process, and upload datasets used in the AI and Data Science for Transport course. It is designed to be run by course maintainers to populate the GitHub Releases with necessary data files.

2 Setup

Load necessary packages.

library(sf)
library(dplyr)
library(readr)
library(tidyr)
library(stringr)
library(stats19)
library(osmdata)
library(osmactive)
library(tmap)

3 1. TfSE Boundary and Geographies

Get the Transport for the South East (TfSE) boundary and relevant administrative geographies.

# TfSE Boundary (assuming local file availability or prior download)
# This part mirrors the logic in s3.qmd
tfse_path = "/home/robin/github/robinlovelace/counterbid/tfse_boundary/TfSE_Area_Boundary.shp"
if(file.exists(tfse_path)){
    tfse_boundary = read_sf(tfse_path)
    
    # Get LADs
    u_lads = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Local_Authority_Districts_May_2024_Boundaries_UK_BUC/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
    lads = read_sf(u_lads)
    
    # Get MSOAs
    u_msoas = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Middle_Super_Output_Areas_DEC_2021_EW_PWC/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
    msoas = read_sf(u_msoas)
    
    # Filter
    tfse_boundary = st_transform(tfse_boundary, st_crs(lads))
    msoas = st_transform(msoas, st_crs(lads))
    
    tfse_lads = lads[tfse_boundary, ]
    tfse_msoas = msoas[tfse_boundary, ]
    
    # Save
    write_sf(tfse_boundary, "tfse_boundary.geojson", delete_dsn = TRUE)
    write_sf(tfse_lads, "tfse_lads.gpkg", delete_dsn = TRUE)
    write_sf(tfse_msoas, "tfse_msoas.gpkg", delete_dsn = TRUE)
}

4 2. Motorway Network (South East England)

Download motorway data from OpenStreetMap.

# Define the bounding box for South East England
bbox <- getbb("South East England, UK")
# Create an Overpass query for motorways
motorway_query <- opq(bbox) |>
  add_osm_feature(key = "highway", value = "motorway")
# Download the data
motorway_data <- osmdata_sf(motorway_query)
# Extract the motorway lines
motorways <- motorway_data$osm_lines
# Save
write_sf(motorways, "south_east_motorways.gpkg", delete_dsn = TRUE)

5 3. STATS19 Collision Data (5 Years)

Download 5 years of road safety data (2020-2024) and filter to TfSE.

# Get 5 years of collision data
years = 2020:2024
collisions_5y = purrr::map_dfr(years, ~ stats19::get_stats19(year = .x, type = "collision"))

# Filter to TfSE if boundary exists
if(file.exists("tfse_boundary.geojson")) {
  tfse_boundary = read_sf("tfse_boundary.geojson")
  collisions_sf = stats19::format_sf(collisions_5y, lonlat = TRUE)
  collisions_sf = st_transform(collisions_sf, st_crs(tfse_boundary))
  collisions_tfse = collisions_sf[tfse_boundary, ]
  write_sf(collisions_tfse, "collisions_tfse_2020_2024.gpkg", delete_dsn = TRUE)
}

6 4. Census OD Data (2011 and 2021)

Download and process Origin-Destination data for commuting. Note: This requires ODWP02EW_MSOA_v1.zip to be present in the working directory. You can download it from WICID (see https://github.com/itsleeds/2021-census-od-data/blob/main/README.md).

# 2021 Data processing
od_zip_path = "ODWP02EW_MSOA_v1.zip"

if(file.exists(od_zip_path)) {
  message("Processing OD data...")
  od_2021 = read_csv(od_zip_path)
  
  # Clean column names and keep only needed columns
  # Columns: 
  # 1: Middle layer Super Output Areas code (Origin)
  # 3: MSOA of workplace code (Destination)
  # 6: Method used to travel to workplace (12 categories) label
  # 7: Count
  od_2021 = od_2021[, c(1, 3, 6, 7)]
  names(od_2021) = c("o", "d", "method", "count")
  
  # Filter to TfSE region
  # We need the MSOA codes from the TfSE boundary
  if(file.exists("tfse_msoas.gpkg")) {
    tfse_msoas = read_sf("tfse_msoas.gpkg")
    valid_msoas = tfse_msoas$MSOA21CD
    
    # Filter: Keep trips where BOTH Origin and Destination are within TfSE
    # This creates a self-contained dataset for the region
    od_tfse = od_2021 |>
      filter(o %in% valid_msoas & d %in% valid_msoas)
    
    # Clean method names (as per README)
    od_tfse = od_tfse |>
      filter(!str_detect(method, "Not in employment")) |>
      mutate(
        method = case_when(
          method == "Bus, minibus or coach" ~ "Bus",
          method == "Driving a car or van" ~ "Car",
          method == "Motorcycle, scooter or moped" ~ "Motorcycle",
          method == "On foot" ~ "Walking",
          method == "Other method of travel to work" ~ "Other",
          method == "Passenger in a car or van" ~ "Passenger",
          method == "Underground, metro, light rail, tram" ~ "Metro",
          method == "Work mainly at or from home" ~ "Home",
          TRUE ~ method
        )
      )
    
    # Pivot to wide format
    od_tfse_wide = od_tfse |>
      pivot_wider(names_from = method, values_from = count, values_fill = 0)
    
    # Save
    write_csv(od_tfse_wide, "od_tfse_2021.csv")
    message("Saved od_tfse_2021.csv")
  } else {
    warning("tfse_msoas.gpkg not found. Skipping OD filtering.")
  }
} else {
  warning("ODWP02EW_MSOA_v1.zip not found. Skipping OD data processing.")
}

7 5. DfT Traffic Data

Download traffic count data.

# Placeholder for DfT traffic data download
# dft_traffic = read.csv("https://storage.googleapis.com/dft-statistics/road-traffic/downloads/dft_traffic_counts_aadf.csv")
# Filter for TfSE region

8 6. TfSE Transport Network (osmactive)

Download and classify transport network using osmactive.

if(file.exists("tfse_boundary.geojson")) {
  tfse_boundary = read_sf("tfse_boundary.geojson")
  
  # Get the travel network for TfSE
  # We use a broader region (e.g., South East England) and clip to the boundary
  osm_tfse = get_travel_network("England", boundary = tfse_boundary, boundary_type = "clipsrc")
  
  # Extract networks
  cycle_net_tfse = get_cycling_network(osm_tfse)
  drive_net_tfse = get_driving_network(osm_tfse)
  
  # Calculate distance to road and classify
  cycle_net_tfse = distance_to_road(cycle_net_tfse, drive_net_tfse)
  cycle_net_tfse = classify_cycle_infrastructure(cycle_net_tfse)
  
  # Save datasets
  write_sf(cycle_net_tfse, "tfse_cycling_network.gpkg", delete_dsn = TRUE)
  write_sf(drive_net_tfse, "tfse_driving_network.gpkg", delete_dsn = TRUE)
}

9 7. Brighton Area Analysis

Create a 20km buffer around Brighton and generate an interactive map.

if(exists("cycle_net_tfse")) {
  # Define Brighton center
  # Coordinates for Brighton: ~ 50.8225° N, 0.1372° W
  brighton_point = st_point(c(-0.1372, 50.8225)) |> 
    st_sfc(crs = 4326) |> 
    st_transform(st_crs(cycle_net_tfse))
  
  # Buffer 20km
  brighton_buffer = st_buffer(brighton_point, 20000)
  
  # Clip the cycle network
  brighton_cycle_net = cycle_net_tfse[brighton_buffer, ]
  
  # Save the subset
  write_sf(brighton_cycle_net, "brighton_20km_cycle_network.gpkg", delete_dsn = TRUE)
  
  # Create interactive map
  tmap_mode("view")
  m_brighton = tm_shape(brighton_cycle_net) +
    tm_lines(col = "cycle_segregation", lwd = 2, title.col = "Cycle Infrastructure") +
    tm_basemap("CartoDB.Positron")
  
  # Save map
  tmap_save(m_brighton, "brighton_cycle_map.html")
}

10 Upload to GitHub Release

Upload all generated files to the v1 release.

files_to_upload = c(
  "tfse_boundary.geojson",
  "tfse_lads.gpkg",
  "tfse_msoas.gpkg",
  "south_east_motorways.gpkg",
  "collisions_tfse_2020_2024.gpkg",
  "od_tfse_2021.csv",
  "tfse_cycling_network.gpkg",
  "tfse_driving_network.gpkg",
  "brighton_20km_cycle_network.gpkg",
  "brighton_cycle_map.html"
)

for(f in files_to_upload) {
  if(file.exists(f)) {
    system(paste("gh release upload v1", f, "--clobber"))
  }
}

Reuse