# load packages
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.5.2
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Hello this is some text.

casualty_type = c("cat", "dog", "person")
casualty_age = seq(from = 20, to = 60, by = 20)
crashes = data.frame(casualty_type, casualty_age)
plot(crashes$casualty_age)

Subsetting.

crashes$casualty_type
[1] "cat"    "dog"    "person"
crashes[[1]]
[1] "cat"    "dog"    "person"
crashes[2,1]
[1] "dog"
crashes |>
  select(casualty_type)
  casualty_type
1           cat
2           dog
3        person
crashes |> 
  filter(casualty_age > 35)
  casualty_type casualty_age
1           dog           40
2        person           60
crashes |> 
  filter(casualty_age-20 > 35)
  casualty_type casualty_age
1        person           60
crashes |>
  ggplot() +
  geom_bar(aes(x = casualty_age, fill = casualty_type))

ac = stats19::get_stats19(year = 2020, type = "collision")
Files identified: dft-road-casualty-statistics-collision-2020.csv
   https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-2020.csv
Data saved at /tmp/RtmprkxnZh/dft-road-casualty-statistics-collision-2020.csv
Reading in: 
/tmp/RtmprkxnZh/dft-road-casualty-statistics-collision-2020.csv
date and time columns present, creating formatted datetime column
class(ac)
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
dim(ac)
[1] 91199    45
ac_2021 = stats19::get_stats19(year = 2021, type = "collision")
Files identified: dft-road-casualty-statistics-collision-2021.csv
   https://data.dft.gov.uk/road-accidents-safety-data/dft-road-casualty-statistics-collision-2021.csv
Data saved at /tmp/RtmprkxnZh/dft-road-casualty-statistics-collision-2021.csv
Reading in: 
/tmp/RtmprkxnZh/dft-road-casualty-statistics-collision-2021.csv
date and time columns present, creating formatted datetime column
nrow(ac)
[1] 91199
nrow(ac_2021)
[1] 101087
# # After googling "combine 2 data frames" let's try rbind
# ??combine
# ?rbind
ac = rbind(ac, ac_2021)
dim(ac)
[1] 192286     45
ac_datetime = c(ac$datetime, ac_2021$datetime)
length(ac_datetime)
[1] 293373
range(ac_datetime)
[1] "2020-01-01 00:01:00 GMT" "2021-12-31 23:55:00 GMT"
class(ac)
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
str(ac)
spc_tbl_ [192,286 × 45] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ collision_index                                 : chr [1:192286] "2020170H10890" "2020170L20690" "2020070094394" "2020070453828" ...
 $ collision_year                                  : int [1:192286] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
 $ collision_ref_no                                : chr [1:192286] "170H10890" "170L20690" "070094394" "070453828" ...
 $ location_easting_osgr                           : int [1:192286] 446191 455573 355466 350786 360959 368986 369533 369274 330668 508733 ...
 $ location_northing_osgr                          : int [1:192286] 534540 519900 383570 385820 388689 422593 428752 429065 437057 431050 ...
 $ longitude                                       : int [1:192286] NA NA NA NA NA NA NA NA NA NA ...
 $ latitude                                        : int [1:192286] NA NA NA NA NA NA NA NA NA NA ...
 $ police_force                                    : chr [1:192286] "Cleveland" "Cleveland" "Cheshire" "Cheshire" ...
 $ collision_severity                              : chr [1:192286] "Slight" "Slight" "Slight" "Slight" ...
 $ number_of_vehicles                              : chr [1:192286] "2" "1" "2" "2" ...
 $ number_of_casualties                            : chr [1:192286] "1" "1" "2" "1" ...
 $ date                                            : Date[1:192286], format: "2020-10-28" "2020-08-31" ...
 $ day_of_week                                     : chr [1:192286] "Wednesday" "Monday" "Wednesday" "Friday" ...
 $ time                                            : chr [1:192286] "09:08" "13:25" "21:30" "20:05" ...
 $ local_authority_district                        : chr [1:192286] "Hartlepool" "Redcar and Cleveland" "Halton" "Halton" ...
 $ local_authority_ons_district                    : chr [1:192286] "Hartlepool" "Redcar and Cleveland" "Halton" "Halton" ...
 $ local_authority_highway                         : chr [1:192286] "Hartlepool" "Redcar and Cleveland" "Halton" "Halton" ...
 $ local_authority_highway_current                 : chr [1:192286] "E06000001" "E06000003" "E06000006" "E06000006" ...
 $ first_road_class                                : chr [1:192286] "A" "Unclassified" "A" "Unclassified" ...
 $ first_road_number                               : chr [1:192286] "179" "first_road_class is C or Unclassified. These roads do not have official numbers so recorded as zero " "558" "first_road_class is C or Unclassified. These roads do not have official numbers so recorded as zero " ...
 $ road_type                                       : chr [1:192286] "Single carriageway" "Single carriageway" "Slip road" "Single carriageway" ...
 $ speed_limit                                     : chr [1:192286] "60" "30" "60" "20" ...
 $ junction_detail_historic                        : chr [1:192286] "T or staggered junction" "Crossroads" "Roundabout" "T or staggered junction" ...
 $ junction_detail                                 : chr [1:192286] "T or staggered junction" "Crossroads" "Not at junction or within 20 metres" "T or staggered junction" ...
 $ junction_control                                : chr [1:192286] "Give way or uncontrolled" "Give way or uncontrolled" "Give way or uncontrolled" "Give way or uncontrolled" ...
 $ second_road_class                               : chr [1:192286] "Unclassified" "Unclassified" "Unclassified" "Unclassified" ...
 $ second_road_number                              : chr [1:192286] "first_road_class is C or Unclassified. These roads do not have official numbers so recorded as zero " "first_road_class is C or Unclassified. These roads do not have official numbers so recorded as zero " "first_road_class is C or Unclassified. These roads do not have official numbers so recorded as zero " "first_road_class is C or Unclassified. These roads do not have official numbers so recorded as zero " ...
 $ pedestrian_crossing_human_control_historic      : chr [1:192286] "None within 50 metres " "None within 50 metres " "None within 50 metres " "None within 50 metres " ...
 $ pedestrian_crossing_physical_facilities_historic: chr [1:192286] "No physical crossing facilities within 50 metres" "No physical crossing facilities within 50 metres" "No physical crossing facilities within 50 metres" "No physical crossing facilities within 50 metres" ...
 $ pedestrian_crossing                             : chr [1:192286] "No physical crossing facility within 50m" "No physical crossing facility within 50m" "No physical crossing facility within 50m" "No physical crossing facility within 50m" ...
 $ light_conditions                                : chr [1:192286] "Daylight" "Daylight" "Darkness - lighting unknown" "Daylight" ...
 $ weather_conditions                              : chr [1:192286] "Fine no high winds" "Fine no high winds" "Fine no high winds" "Fine no high winds" ...
 $ road_surface_conditions                         : chr [1:192286] "Dry" "Dry" "Wet or damp" "Dry" ...
 $ special_conditions_at_site                      : chr [1:192286] "None" "None" "None" "None" ...
 $ carriageway_hazards_historic                    : chr [1:192286] "None" "None" "None" "None" ...
 $ carriageway_hazards                             : chr [1:192286] "None" "None" "None" "None" ...
 $ urban_or_rural_area                             : chr [1:192286] "Rural" "Urban" "Rural" "Urban" ...
 $ did_police_officer_attend_scene_of_accident     : chr [1:192286] "Yes" "Yes" "Yes" "Yes" ...
 $ trunk_road_flag                                 : chr [1:192286] "Non-trunk" "Non-trunk" "Non-trunk" "Non-trunk" ...
 $ lsoa_of_accident_location                       : chr [1:192286] "E01011959" "E01033471" "E01012386" "E01012427" ...
 $ enhanced_severity_collision                     : num [1:192286] -1 -1 -1 -1 -1 7 3 3 3 3 ...
 $ collision_injury_based                          : chr [1:192286] "Based on severity reporting" "Based on severity reporting" "Based on severity reporting" "Based on severity reporting" ...
 $ collision_adjusted_severity_serious             : num [1:192286] 0.2309 0.117 0.0431 0.0239 0.1138 ...
 $ collision_adjusted_severity_slight              : num [1:192286] 0.769 0.883 0.957 0.976 0.886 ...
 $ datetime                                        : POSIXct[1:192286], format: "2020-10-28 09:08:00" "2020-08-31 13:25:00" ...
 - attr(*, "spec")=
  .. cols(
  ..   collision_index = col_character(),
  ..   collision_year = col_integer(),
  ..   collision_ref_no = col_character(),
  ..   location_easting_osgr = col_integer(),
  ..   location_northing_osgr = col_integer(),
  ..   longitude = col_integer(),
  ..   latitude = col_integer(),
  ..   police_force = col_character(),
  ..   collision_severity = col_character(),
  ..   number_of_vehicles = col_character(),
  ..   number_of_casualties = col_character(),
  ..   date = col_character(),
  ..   day_of_week = col_character(),
  ..   time = col_character(),
  ..   local_authority_district = col_character(),
  ..   local_authority_ons_district = col_character(),
  ..   local_authority_highway = col_character(),
  ..   local_authority_highway_current = col_character(),
  ..   first_road_class = col_character(),
  ..   first_road_number = col_character(),
  ..   road_type = col_character(),
  ..   speed_limit = col_character(),
  ..   junction_detail_historic = col_character(),
  ..   junction_detail = col_character(),
  ..   junction_control = col_character(),
  ..   second_road_class = col_character(),
  ..   second_road_number = col_character(),
  ..   pedestrian_crossing_human_control_historic = col_character(),
  ..   pedestrian_crossing_physical_facilities_historic = col_character(),
  ..   pedestrian_crossing = col_character(),
  ..   light_conditions = col_character(),
  ..   weather_conditions = col_character(),
  ..   road_surface_conditions = col_character(),
  ..   special_conditions_at_site = col_character(),
  ..   carriageway_hazards_historic = col_character(),
  ..   carriageway_hazards = col_character(),
  ..   urban_or_rural_area = col_character(),
  ..   did_police_officer_attend_scene_of_accident = col_character(),
  ..   trunk_road_flag = col_character(),
  ..   lsoa_of_accident_location = col_character(),
  ..   enhanced_severity_collision = col_double(),
  ..   collision_injury_based = col_character(),
  ..   collision_adjusted_severity_serious = col_double(),
  ..   collision_adjusted_severity_slight = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
names(ac)
 [1] "collision_index"                                 
 [2] "collision_year"                                  
 [3] "collision_ref_no"                                
 [4] "location_easting_osgr"                           
 [5] "location_northing_osgr"                          
 [6] "longitude"                                       
 [7] "latitude"                                        
 [8] "police_force"                                    
 [9] "collision_severity"                              
[10] "number_of_vehicles"                              
[11] "number_of_casualties"                            
[12] "date"                                            
[13] "day_of_week"                                     
[14] "time"                                            
[15] "local_authority_district"                        
[16] "local_authority_ons_district"                    
[17] "local_authority_highway"                         
[18] "local_authority_highway_current"                 
[19] "first_road_class"                                
[20] "first_road_number"                               
[21] "road_type"                                       
[22] "speed_limit"                                     
[23] "junction_detail_historic"                        
[24] "junction_detail"                                 
[25] "junction_control"                                
[26] "second_road_class"                               
[27] "second_road_number"                              
[28] "pedestrian_crossing_human_control_historic"      
[29] "pedestrian_crossing_physical_facilities_historic"
[30] "pedestrian_crossing"                             
[31] "light_conditions"                                
[32] "weather_conditions"                              
[33] "road_surface_conditions"                         
[34] "special_conditions_at_site"                      
[35] "carriageway_hazards_historic"                    
[36] "carriageway_hazards"                             
[37] "urban_or_rural_area"                             
[38] "did_police_officer_attend_scene_of_accident"     
[39] "trunk_road_flag"                                 
[40] "lsoa_of_accident_location"                       
[41] "enhanced_severity_collision"                     
[42] "collision_injury_based"                          
[43] "collision_adjusted_severity_serious"             
[44] "collision_adjusted_severity_slight"              
[45] "datetime"                                        
# aggregate this by day to show 
# how crash numbers varied over the year
ac_by_year = ac |>
  group_by(date) |>
  summarise(
    n_crashes = n()
  )
ac_by_year |>
  mutate(
    `N. crashes per year` = n_crashes,
    `Week average` = zoo::rollmean(n_crashes, 7, na.pad = TRUE),
    Date = date,
  ) |> 
  ggplot(aes(x = Date, y = `N. crashes per year`)) +
  geom_point(alpha = 0.1) +
  ylim(c(0, NA)) +
  # geom_smooth() +
  # weekly rolling average
  geom_line(aes(Date, `Week average`), colour = "red") +
  theme_minimal()
Warning: Removed 6 rows containing missing values or values outside the scale range
(`geom_line()`).

# Updated plot with title and legend...
ac_by_year |>
  mutate(
    `N. crashes per year` = n_crashes,
    `Week average` = zoo::rollmean(n_crashes, 7, na.pad = TRUE),
    Date = date,
  ) |> 
  ggplot(aes(x = Date, y = `N. crashes per year`)) +
  geom_point(alpha = 0.1) +
  ylim(c(0, NA)) +
  # geom_smooth() +
  # weekly rolling average
  geom_line(aes(Date, `Week average`, colour = "Week average")) +
  theme_minimal() +
  labs(
    colour = "Legend"
  ) +
  scale_colour_manual(values = c("Week average" = "red")) +
  ggtitle("Collions/day, 2020 to 2021") +
  theme(
    legend.position = "bottom"
  )
Warning: Removed 6 rows containing missing values or values outside the scale range
(`geom_line()`).

1 Python example

casualty_type_py = ["a", "B", "c"]
casualty_type_py
['a', 'B', 'c']