trending aims to provides a coherent interface to several modelling tools. Whilst it is useful in an interactive context, it’s main focus is to provide an intuitive interface on which other packages can be developed (e.g. trendbreaker).
Main features
Model specification: Interfaces to common models through intuitive functions;
lm_model()
,glm_model()
,glm_nb_model
andbrms_model
*.Model fitting and prediction: Once specified, models can be fit to data and generate confidence and prediction intervals for future data using
fit()
andpredict()
.Error and warning catching: The provided methods for
fit
andpredict
catch all warnings and errors, returning the output and these captured values in a list.
* Requires brms
Example usage
Setup
library(outbreaks) # for data
library(trending) # for trend fitting
library(dplyr) # for data manipulation
# load data
data(covid19_england_nhscalls_2020)
# select 6 weeks of data (from a period when the prevalence was decreasing)
last_date <- as.Date("2020-05-28")
first_date <- last_date - 8*7
pathways_recent <-
covid19_england_nhscalls_2020 %>%
filter(date >= first_date, date <= last_date) %>%
group_by(date, day, weekday) %>%
summarise(count = sum(count), .groups = "drop")
# split data for fitting and prediction
dat <-
pathways_recent %>%
group_by(date <= first_date + 6*7) %>%
group_split()
fitting_data <- dat[[2]]
pred_data <- select(dat[[1]], date, day, weekday)
A succesful model fit
(model <- glm_nb_model(count ~ day + weekday))
#> Untrained trending model:
#> glm.nb(formula = count ~ day + weekday)
(fitted_model <- fit(model, fitting_data))
#> <trending_fit_tbl> 1 x 3
#> result warnings errors
#> <list> <list> <list>
#> 1 <negbin> <NULL> <NULL>
fitted_model %>% get_result()
#> [[1]]
#>
#> Call: glm.nb(formula = count ~ day + weekday, data = fitting_data,
#> init.theta = 39.34595795, link = log)
#>
#> Coefficients:
#> (Intercept) day weekdaymonday weekdayweekend
#> 11.46417 -0.03476 0.19162 -0.14224
#>
#> Degrees of Freedom: 42 Total (i.e. Null); 39 Residual
#> Null Deviance: 395.9
#> Residual Deviance: 43.17 AIC: 850.6
# default
fitted_model %>%
predict(pred_data) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 8107 18870
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 6618 16223
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 6373 15714
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 8363 21784
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 6962 16638
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 6701 16124
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 6450 15626
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 6208 15145
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5079 12992
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 4889 12588
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 6439 17388
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 5326 13366
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5126 12955
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 4933 12558
# without uncertainty
fitted_model %>%
predict(pred_data, uncertain = FALSE) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 9027 16948
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 7562 14199
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 7304 13715
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 9852 18494
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 7855 14748
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 7586 14245
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 7327 13758
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 7076 13289
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5928 11134
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 5725 10754
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 7723 14501
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 6157 11564
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5947 11170
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 5743 10788
# without prediction intervals
fitted_model %>%
predict(pred_data, add_pi = FALSE) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 6
#> date day weekday estimate lower_ci upper_ci
#> <date> <int> <fct> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122.
#> 2 2020-05-16 59 weekend 10625. 9299. 12140.
#> 3 2020-05-17 60 weekend 10262. 8956. 11759.
#> 4 2020-05-18 61 monday 13840. 11749. 16303.
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450.
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066.
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693.
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333.
#> 9 2020-05-23 66 weekend 8330. 7138. 9721.
#> 10 2020-05-24 67 weekend 8045. 6872. 9419.
#> 11 2020-05-25 68 monday 10851. 9048. 13012.
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001.
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694.
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396.
# bootstraped prediction intervals
fitted_model %>%
predict(pred_data, simulate_pi = TRUE) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 8943 17141
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 7502 14550
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 7031 14116
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 9530 19059
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 7619 14946
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 7396 14427
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 7219 14028
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 6946 13532
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5694 11563
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 5608 11247
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 7388 15247
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 6011 12209
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5857 11701
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 5564 11239
Example of failed model fitting
(model2 <- glm_nb_model(count ~ day + nonexistent))
#> Untrained trending model:
#> glm.nb(formula = count ~ day + nonexistent)
(fitted_model2 <- fit(model2, fitting_data))
#> <trending_fit_tbl> 1 x 3
#> result warnings errors
#> <list> <list> <list>
#> 1 <NULL> <NULL> <chr [1]>
get_result(fitted_model2)
#> [[1]]
#> NULL
get_errors(fitted_model2)
#> [[1]]
#> [1] "object 'nonexistent' not found"
Multiple models
The fit function also works with a list input of multiple models.
models <- list(
simple = lm_model(count ~ day),
glm_poisson = glm_model(count ~ day, family = "poisson"),
glm_negbin = glm_nb_model(count ~ day + weekday),
will_error = glm_nb_model(count ~ day + nonexistant)
)
(fitted_tbl <- fit(models, fitting_data))
#> <trending_fit_tbl> 4 x 4
#> model_name result warnings errors
#> <chr> <named list> <named list> <named list>
#> 1 simple <lm> <NULL> <NULL>
#> 2 glm_poisson <glm> <NULL> <NULL>
#> 3 glm_negbin <negbin> <NULL> <NULL>
#> 4 will_error <NULL> <NULL> <chr [1]>
get_result(fitted_tbl)
#> $simple
#>
#> Call:
#> lm(formula = count ~ day, data = fitting_data)
#>
#> Coefficients:
#> (Intercept) day
#> 69202 -1093
#>
#>
#> $glm_poisson
#>
#> Call: glm(formula = count ~ day, family = "poisson", data = fitting_data)
#>
#> Coefficients:
#> (Intercept) day
#> 11.57014 -0.03822
#>
#> Degrees of Freedom: 42 Total (i.e. Null); 41 Residual
#> Null Deviance: 321600
#> Residual Deviance: 50970 AIC: 51490
#>
#> $glm_negbin
#>
#> Call: glm.nb(formula = count ~ day + weekday, data = fitting_data,
#> init.theta = 39.34595795, link = log)
#>
#> Coefficients:
#> (Intercept) day weekdaymonday weekdayweekend
#> 11.46417 -0.03476 0.19162 -0.14224
#>
#> Degrees of Freedom: 42 Total (i.e. Null); 39 Residual
#> Null Deviance: 395.9
#> Residual Deviance: 43.17 AIC: 850.6
#>
#> $will_error
#> NULL
This can also then be used with predict()
(pred <- predict(fitted_tbl, pred_data))
#> <trending_predict_tbl> 4 x 4
#> model_name result warnings errors
#> <chr> <named list> <named list> <named list>
#> 1 simple <trndng_p [14 × 8]> <NULL> <NULL>
#> 2 glm_poisson <trndng_p [14 × 8]> <NULL> <NULL>
#> 3 glm_negbin <trndng_p [14 × 8]> <NULL> <NULL>
#> 4 will_error <NULL> <NULL> <chr [1]>
get_result(pred)
#> $simple
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 5813. 234. 11392. -13007. 24632.
#> 2 2020-05-16 59 weekend 4720. -1053. 10492. -14158. 23597.
#> 3 2020-05-17 60 weekend 3627. -2341. 9594. -15311. 22565.
#> 4 2020-05-18 61 monday 2534. -3631. 8699. -16467. 21535.
#> 5 2020-05-19 62 rest_of_week 1441. -4922. 7804. -17626. 20508.
#> 6 2020-05-20 63 rest_of_week 348. -6215. 6911. -18786. 19482.
#> 7 2020-05-21 64 rest_of_week -745. -7509. 6020. -19949. 18459.
#> 8 2020-05-22 65 rest_of_week -1838. -8805. 5129. -21114. 17439.
#> 9 2020-05-23 66 weekend -2931. -10101. 4240. -22282. 16420.
#> 10 2020-05-24 67 weekend -4024. -11399. 3352. -23451. 15404.
#> 11 2020-05-25 68 monday -5116. -12697. 2464. -24623. 14390.
#> 12 2020-05-26 69 rest_of_week -6209. -13996. 1578. -25797. 13378.
#> 13 2020-05-27 70 rest_of_week -7302. -15296. 692. -26973. 12369.
#> 14 2020-05-28 71 rest_of_week -8395. -16597. -193. -28152. 11361.
#>
#> $glm_poisson
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 11535. 11484. 11587. 11275 11798
#> 2 2020-05-16 59 weekend 11103. 11052. 11154. 10846 11361
#> 3 2020-05-17 60 weekend 10686. 10636. 10737. 10434 10941
#> 4 2020-05-18 61 monday 10286. 10236. 10336. 10038 10536
#> 5 2020-05-19 62 rest_of_week 9900. 9850. 9950. 9656 10146
#> 6 2020-05-20 63 rest_of_week 9529. 9480. 9578. 9289 9770
#> 7 2020-05-21 64 rest_of_week 9171. 9123. 9220. 8936 9409
#> 8 2020-05-22 65 rest_of_week 8827. 8780. 8876. 8596 9061
#> 9 2020-05-23 66 weekend 8496. 8449. 8544. 8269 8726
#> 10 2020-05-24 67 weekend 8178. 8131. 8225. 7955 8403
#> 11 2020-05-25 68 monday 7871. 7825. 7917. 7652 8092
#> 12 2020-05-26 69 rest_of_week 7576. 7531. 7621. 7361 7793
#> 13 2020-05-27 70 rest_of_week 7292. 7247. 7337. 7081 7505
#> 14 2020-05-28 71 rest_of_week 7018. 6974. 7063. 6811 7228
#>
#> $glm_negbin
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 8107 18870
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 6618 16223
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 6373 15714
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 8363 21784
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 6962 16638
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 6701 16124
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 6450 15626
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 6208 15145
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5079 12992
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 4889 12588
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 6439 17388
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 5326 13366
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5126 12955
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 4933 12558
#>
#> $will_error
#> NULL