|>
+ pr_death_counts ggplot(aes(date, deaths)) +
+ geom_point()
Warning: Removed 4 rows containing missing values (`geom_point()`).
+<- pr_death_counts |> filter(date < make_date(2018, 4, 1)) dat
diff --git a/33-smoothing.qmd b/33-smoothing.qmd index 4e925cf..5c600bc 100644 --- a/33-smoothing.qmd +++ b/33-smoothing.qmd @@ -574,4 +574,60 @@ $$ head(pr_death_counts) ``` +Explore the data and notice we should remove missing or incomplete values +```{r} +pr_death_counts |> + ggplot(aes(date, deaths)) + + geom_point() + +dat <- pr_death_counts |> filter(date < make_date(2018, 4, 1)) +``` + + +We start by fitting a seasonal model. We will fit the model to 2015-2016 and then obtain residulas for 2017-2018 + +```{r} +dat <- mutate(dat, tt = as.numeric(date - make_date(2017, 9, 20))) |> + filter(!is.na(deaths)) |> + mutate(x1 = sin(2*pi*tt/365), x2 = cos(2*pi*tt/365), + x3 = sin(4*pi*tt/365), x4 = cos(4*pi*tt/365)) + +seasonal_fit <- lm(deaths ~ x1 + x2 + x3 + x4, data = filter(dat, year(date) < 2017)) +``` + +Check the fit + +```{r} +with(dat, plot(tt, deaths)) +s <- predict(seasonal_fit, newdata = dat) +lines(dat$tt, s, col = 2, lwd = 2) +``` + + +Let's now fit smooth curve to the residuals + +```{r} +dat <- mutate(dat, resid = deaths - s) +fit <- loess(resid ~ tt, span = 1/20, degree = 1, data = dat) +fit <- predict(fit, se = TRUE) +dat <- mutate(dat, f = fit$fit) |> + mutate(lower = f - qnorm(0.995)*fit$se.fit, upper = f + qnorm(.995)*fit$se.fit) +dat |> ggplot(aes(x = date)) + + geom_point(aes(y = resid)) + + geom_line(aes(y = f), color = "red") +``` + +Is there some evidence of indirect effect after the first week after landfall? + +```{r} +dat |> + filter(year(date) >= 2017) |> + ggplot(aes(date, f, ymin = lower, ymax = upper)) + + geom_ribbon(alpha = 0.5) + + geom_line(color = "red") + + geom_hline(yintercept = 0) + + theme_bw() +``` + +Note that this is only an exploratory analysis. A formal analysis would take into account changing demographics and perhaps fit a model with a discontinuity on the day the hurricane made landfall. \ No newline at end of file diff --git a/docs/33-smoothing.html b/docs/33-smoothing.html index dc3e2cb..88fe72f 100644 --- a/docs/33-smoothing.html +++ b/docs/33-smoothing.html @@ -835,6 +835,65 @@
head(pr_death_counts)
Explore the data and notice we should remove missing or incomplete values
+|>
+ pr_death_counts ggplot(aes(date, deaths)) +
+ geom_point()
Warning: Removed 4 rows containing missing values (`geom_point()`).
+<- pr_death_counts |> filter(date < make_date(2018, 4, 1)) dat
We start by fitting a seasonal model. We will fit the model to 2015-2016 and then obtain residulas for 2017-2018
+<- mutate(dat, tt = as.numeric(date - make_date(2017, 9, 20))) |>
+ dat filter(!is.na(deaths)) |>
+ mutate(x1 = sin(2*pi*tt/365), x2 = cos(2*pi*tt/365),
+ x3 = sin(4*pi*tt/365), x4 = cos(4*pi*tt/365))
+
+<- lm(deaths ~ x1 + x2 + x3 + x4, data = filter(dat, year(date) < 2017)) seasonal_fit
Check the fit
+with(dat, plot(tt, deaths))
+<- predict(seasonal_fit, newdata = dat)
+ s lines(dat$tt, s, col = 2, lwd = 2)
Let’s now fit smooth curve to the residuals
+<- mutate(dat, resid = deaths - s)
+ dat <- loess(resid ~ tt, span = 1/20, degree = 1, data = dat)
+ fit <- predict(fit, se = TRUE)
+ fit <- mutate(dat, f = fit$fit) |>
+ dat mutate(lower = f - qnorm(0.995)*fit$se.fit, upper = f + qnorm(.995)*fit$se.fit)
+ |> ggplot(aes(x = date)) +
+ dat geom_point(aes(y = resid)) +
+ geom_line(aes(y = f), color = "red")
Is there some evidence of indirect effect after the first week after landfall?
+|>
+ dat filter(year(date) >= 2017) |>
+ ggplot(aes(date, f, ymin = lower, ymax = upper)) +
+ geom_ribbon(alpha = 0.5) +
+ geom_line(color = "red") +
+ geom_hline(yintercept = 0) +
+ theme_bw()
Note that this is only an exploratory analysis. A formal analysis would take into account changing demographics and perhaps fit a model with a discontinuity on the day the hurricane made landfall.
diff --git a/docs/33-smoothing_files/figure-html/unnamed-chunk-5-1.png b/docs/33-smoothing_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 0000000..3f6e478 Binary files /dev/null and b/docs/33-smoothing_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/33-smoothing_files/figure-html/unnamed-chunk-7-1.png b/docs/33-smoothing_files/figure-html/unnamed-chunk-7-1.png new file mode 100644 index 0000000..53223f9 Binary files /dev/null and b/docs/33-smoothing_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/33-smoothing_files/figure-html/unnamed-chunk-8-1.png b/docs/33-smoothing_files/figure-html/unnamed-chunk-8-1.png new file mode 100644 index 0000000..3a5e025 Binary files /dev/null and b/docs/33-smoothing_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/33-smoothing_files/figure-html/unnamed-chunk-9-1.png b/docs/33-smoothing_files/figure-html/unnamed-chunk-9-1.png new file mode 100644 index 0000000..b11cc3c Binary files /dev/null and b/docs/33-smoothing_files/figure-html/unnamed-chunk-9-1.png differ diff --git a/docs/search.json b/docs/search.json index 6f18769..f3ec34e 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1915,6 +1915,6 @@ "href": "33-smoothing.html#case-study-estimating-indirect-effects-of-natural-dissasters", "title": "34 Smoothing", "section": "34.7 Case Study: Estimating indirect effects of Natural Dissasters", - "text": "34.7 Case Study: Estimating indirect effects of Natural Dissasters\n\nhead(pr_death_counts)" + "text": "34.7 Case Study: Estimating indirect effects of Natural Dissasters\n\nhead(pr_death_counts)\n\nExplore the data and notice we should remove missing or incomplete values\n\npr_death_counts |>\n ggplot(aes(date, deaths)) +\n geom_point()\n\nWarning: Removed 4 rows containing missing values (`geom_point()`).\n\n\n\n\ndat <- pr_death_counts |> filter(date < make_date(2018, 4, 1))\n\nWe start by fitting a seasonal model. We will fit the model to 2015-2016 and then obtain residulas for 2017-2018\n\ndat <- mutate(dat, tt = as.numeric(date - make_date(2017, 9, 20))) |>\n filter(!is.na(deaths)) |>\n mutate(x1 = sin(2*pi*tt/365), x2 = cos(2*pi*tt/365),\n x3 = sin(4*pi*tt/365), x4 = cos(4*pi*tt/365)) \n\nseasonal_fit <- lm(deaths ~ x1 + x2 + x3 + x4, data = filter(dat, year(date) < 2017))\n\nCheck the fit\n\nwith(dat, plot(tt, deaths))\ns <- predict(seasonal_fit, newdata = dat)\nlines(dat$tt, s, col = 2, lwd = 2)\n\n\n\n\nLet’s now fit smooth curve to the residuals\n\ndat <- mutate(dat, resid = deaths - s)\nfit <- loess(resid ~ tt, span = 1/20, degree = 1, data = dat)\nfit <- predict(fit, se = TRUE)\ndat <- mutate(dat, f = fit$fit) |>\n mutate(lower = f - qnorm(0.995)*fit$se.fit, upper = f + qnorm(.995)*fit$se.fit)\ndat |> ggplot(aes(x = date)) +\n geom_point(aes(y = resid)) +\n geom_line(aes(y = f), color = \"red\")\n\n\n\n\nIs there some evidence of indirect effect after the first week after landfall?\n\ndat |> \n filter(year(date) >= 2017) |>\n ggplot(aes(date, f, ymin = lower, ymax = upper)) +\n geom_ribbon(alpha = 0.5) +\n geom_line(color = \"red\") +\n geom_hline(yintercept = 0) +\n theme_bw()\n\n\n\n\nNote that this is only an exploratory analysis. A formal analysis would take into account changing demographics and perhaps fit a model with a discontinuity on the day the hurricane made landfall." } ] \ No newline at end of file diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 2272081..a6a6562 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -150,6 +150,6 @@