Example
To illustrate the trend fitting functionality of i2extras we will use the simulated Ebola Virus Disease (EVD) outbreak data from the outbreaks package.
Loading relevant packages and data
library(outbreaks)
library(i2extras)
#> Loading required package: incidence2
#> Loading required package: grates
library(ggplot2)
raw_dat <- ebola_sim_clean$linelist
For this example we will restrict ourselves to the first 20 weeks of data:
dat <- incidence(
raw_dat,
date_index = "date_of_onset",
interval = "week",
groups = "gender"
)[1:20, ]
dat
#> # incidence: 20 x 4
#> # count vars: date_of_onset
#> # groups: gender
#> date_index gender count_variable count
#> * <isowk> <fct> <chr> <int>
#> 1 2014-W15 f date_of_onset 1
#> 2 2014-W16 m date_of_onset 1
#> 3 2014-W17 f date_of_onset 4
#> 4 2014-W17 m date_of_onset 1
#> 5 2014-W18 f date_of_onset 4
#> 6 2014-W19 f date_of_onset 9
#> 7 2014-W19 m date_of_onset 3
#> 8 2014-W20 f date_of_onset 7
#> 9 2014-W20 m date_of_onset 10
#> 10 2014-W21 f date_of_onset 8
#> 11 2014-W21 m date_of_onset 7
#> 12 2014-W22 f date_of_onset 9
#> 13 2014-W22 m date_of_onset 10
#> 14 2014-W23 f date_of_onset 13
#> 15 2014-W23 m date_of_onset 10
#> 16 2014-W24 f date_of_onset 7
#> 17 2014-W24 m date_of_onset 14
#> 18 2014-W25 f date_of_onset 15
#> 19 2014-W25 m date_of_onset 15
#> 20 2014-W26 f date_of_onset 12
plot(dat, angle = 45, border_colour = "white")
Modeling incidence
We can use fit_curve()
to fit the data with either a poisson or negative binomial regression model. The output from this will be a nested object with class incidence2_fit
which has methods available for both automatic plotting and the calculation of growth (decay) rates and doubling (halving) times.
out <- fit_curve(dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 2 × 9
#> count_varia…¹ gender data model estimates fitti…² fitti…³ predi…⁴ predi…⁵
#> <chr> <fct> <list<t> <lis> <list> <list> <list> <list> <list>
#> 1 date_of_onset f [11 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> 2 date_of_onset m [9 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> # … with abbreviated variable names ¹count_variable, ²fitting_warning,
#> # ³fitting_error, ⁴prediction_warning, ⁵prediction_error
plot(out, angle = 45, border_colour = "white")
growth_rate(out)
#> # A tibble: 2 × 10
#> count_varia…¹ gender model r r_lower r_upper growt…² time time_…³ time_…⁴
#> <chr> <fct> <lis> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 date_of_onset f <glm> 0.137 0.0698 0.206 doubli… 5.07 3.36 9.93
#> 2 date_of_onset m <glm> 0.240 0.146 0.341 doubli… 2.89 2.03 4.75
#> # … with abbreviated variable names ¹count_variable, ²growth_or_decay,
#> # ³time_lower, ⁴time_upper
To unnest the data we can use unnest()
(a function that has been reexported from the tidyr package.
unnest(out, estimates)
#> # A tibble: 20 × 15
#> count_v…¹ gender data model count date_…² estim…³ lower…⁴ upper…⁵ lower…⁶
#> <chr> <fct> <list<t> <lis> <int> <isowk> <dbl> <dbl> <dbl> <dbl>
#> 1 date_of_… f [11 × 2] <glm> 1 2014-W… 3.27 1.90 5.61 0
#> 2 date_of_… f [11 × 2] <glm> 4 2014-W… 4.30 2.83 6.52 0
#> 3 date_of_… f [11 × 2] <glm> 4 2014-W… 4.93 3.44 7.06 0
#> 4 date_of_… f [11 × 2] <glm> 9 2014-W… 5.65 4.15 7.68 1
#> 5 date_of_… f [11 × 2] <glm> 7 2014-W… 6.47 4.99 8.40 1
#> 6 date_of_… f [11 × 2] <glm> 8 2014-W… 7.42 5.92 9.31 2
#> 7 date_of_… f [11 × 2] <glm> 9 2014-W… 8.51 6.90 10.5 2
#> 8 date_of_… f [11 × 2] <glm> 13 2014-W… 9.75 7.88 12.1 3
#> 9 date_of_… f [11 × 2] <glm> 7 2014-W… 11.2 8.82 14.2 4
#> 10 date_of_… f [11 × 2] <glm> 15 2014-W… 12.8 9.72 16.9 4
#> 11 date_of_… f [11 × 2] <glm> 12 2014-W… 14.7 10.6 20.4 5
#> 12 date_of_… m [9 × 2] <glm> 1 2014-W… 2.01 1.02 3.95 0
#> 13 date_of_… m [9 × 2] <glm> 1 2014-W… 2.56 1.43 4.59 0
#> 14 date_of_… m [9 × 2] <glm> 3 2014-W… 4.13 2.73 6.24 0
#> 15 date_of_… m [9 × 2] <glm> 10 2014-W… 5.25 3.75 7.36 1
#> 16 date_of_… m [9 × 2] <glm> 7 2014-W… 6.67 5.07 8.79 1
#> 17 date_of_… m [9 × 2] <glm> 10 2014-W… 8.48 6.69 10.8 2
#> 18 date_of_… m [9 × 2] <glm> 10 2014-W… 10.8 8.50 13.7 3
#> 19 date_of_… m [9 × 2] <glm> 14 2014-W… 13.7 10.4 18.0 5
#> 20 date_of_… m [9 × 2] <glm> 15 2014-W… 17.4 12.4 24.4 6
#> # … with 5 more variables: upper_pi <dbl>, fitting_warning <list>,
#> # fitting_error <list>, prediction_warning <list>, prediction_error <list>,
#> # and abbreviated variable names ¹count_variable, ²date_index, ³estimate,
#> # ⁴lower_ci, ⁵upper_ci, ⁶lower_pi
fit_curve()
also works nicely with grouped incidence2
objects. In this situation, we return a nested tibble with some additional columns that indicate whether there has been a warning or error during the fitting or prediction stages.
grouped_dat <- incidence(
raw_dat,
date_index = "date_of_onset",
interval = "week",
groups = "hospital"
)[1:120, ]
grouped_dat
#> # incidence: 120 x 4
#> # count vars: date_of_onset
#> # groups: hospital
#> date_index hospital count_variable count
#> * <isowk> <fct> <chr> <int>
#> 1 2014-W15 Military Hospital date_of_onset 1
#> 2 2014-W16 Connaught Hospital date_of_onset 1
#> 3 2014-W17 NA date_of_onset 2
#> 4 2014-W17 other date_of_onset 3
#> 5 2014-W18 NA date_of_onset 1
#> 6 2014-W18 Connaught Hospital date_of_onset 1
#> 7 2014-W18 Princess Christian Maternity Hospital (PCMH) date_of_onset 1
#> 8 2014-W18 Rokupa Hospital date_of_onset 1
#> 9 2014-W19 NA date_of_onset 1
#> 10 2014-W19 Connaught Hospital date_of_onset 3
#> # … with 110 more rows
out <- fit_curve(grouped_dat, model = "poisson", alpha = 0.05)
out
#> # A tibble: 6 × 9
#> count_vari…¹ hospi…² data model estimates fitti…³ fitti…⁴ predi…⁵ predi…⁶
#> <chr> <fct> <list<t> <lis> <list> <list> <list> <list> <list>
#> 1 date_of_ons… Connau… [22 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> 2 date_of_ons… Milita… [21 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> 3 date_of_ons… other [20 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> 4 date_of_ons… Prince… [17 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> 5 date_of_ons… Rokupa… [18 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> 6 date_of_ons… NA [22 × 2] <glm> <trndng_p> <NULL> <NULL> <NULL> <NULL>
#> # … with abbreviated variable names ¹count_variable, ²hospital,
#> # ³fitting_warning, ⁴fitting_error, ⁵prediction_warning, ⁶prediction_error
# plot with a prediction interval but not a confidence interval
plot(out, ci = FALSE, pi=TRUE, angle = 45, border_colour = "white")
growth_rate(out)
#> # A tibble: 6 × 10
#> count_vari…¹ hospi…² model r r_lower r_upper growt…³ time time_…⁴ time_…⁵
#> <chr> <fct> <lis> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 date_of_ons… Connau… <glm> 0.197 0.177 0.217 doubli… 3.53 3.20 3.92
#> 2 date_of_ons… Milita… <glm> 0.173 0.147 0.200 doubli… 4.00 3.46 4.71
#> 3 date_of_ons… other <glm> 0.170 0.141 0.200 doubli… 4.09 3.46 4.92
#> 4 date_of_ons… Prince… <glm> 0.142 0.101 0.188 doubli… 4.87 3.70 6.87
#> 5 date_of_ons… Rokupa… <glm> 0.178 0.133 0.228 doubli… 3.89 3.04 5.22
#> 6 date_of_ons… NA <glm> 0.184 0.164 0.205 doubli… 3.77 3.39 4.24
#> # … with abbreviated variable names ¹count_variable, ²hospital,
#> # ³growth_or_decay, ⁴time_lower, ⁵time_upper
We provide helper functions, is_ok()
, is_warning()
and is_error()
to help filter the output as necessary.
out <- fit_curve(grouped_dat, model = "negbin", alpha = 0.05)
is_warning(out)
#> # A tibble: 5 × 7
#> count_variable hospital data model estimates fitti…¹ predi…²
#> <chr> <fct> <list<t> <list> <list> <list> <list>
#> 1 date_of_onset Connaught Hospital [22 × 2] <negbin> <trndng_p> <chr> <NULL>
#> 2 date_of_onset other [20 × 2] <negbin> <trndng_p> <chr> <NULL>
#> 3 date_of_onset Princess Christia… [17 × 2] <negbin> <trndng_p> <chr> <NULL>
#> 4 date_of_onset Rokupa Hospital [18 × 2] <negbin> <trndng_p> <chr> <NULL>
#> 5 date_of_onset NA [22 × 2] <negbin> <trndng_p> <chr> <NULL>
#> # … with abbreviated variable names ¹fitting_warning, ²prediction_warning
unnest(is_warning(out), fitting_warning)
#> # A tibble: 10 × 7
#> count_variable hospital data model estimates fitti…¹ predi…²
#> <chr> <fct> <list<t> <list> <list> <chr> <list>
#> 1 date_of_onset Connaught Hospit… [22 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 2 date_of_onset Connaught Hospit… [22 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 3 date_of_onset other [20 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 4 date_of_onset other [20 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 5 date_of_onset Princess Christi… [17 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 6 date_of_onset Princess Christi… [17 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 7 date_of_onset Rokupa Hospital [18 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 8 date_of_onset Rokupa Hospital [18 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 9 date_of_onset NA [22 × 2] <negbin> <trndng_p> iterat… <NULL>
#> 10 date_of_onset NA [22 × 2] <negbin> <trndng_p> iterat… <NULL>
#> # … with abbreviated variable names ¹fitting_warning, ²prediction_warning
Rolling average
We can add a rolling average, across current and previous intervals, to an incidence2
object with the add_rolling_average()
function:
ra <- add_rolling_average(grouped_dat, n = 2L) # group observations with the 2 prior
plot(ra, border_colour = "white", angle = 45) +
geom_line(aes(x = date_index, y = rolling_average))
#> Warning: Removed 1 row containing missing values (`geom_line()`).