diff --git a/.nojekyll b/.nojekyll index 0639c2c1..01e1f02e 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -fad23f16 \ No newline at end of file +b5eb6433 \ No newline at end of file diff --git a/FAQ.html b/FAQ.html index 08aa9d04..1a27d84e 100644 --- a/FAQ.html +++ b/FAQ.html @@ -414,14 +414,19 @@

Install pystan

I missed some deadline or wasn’t able to do some part of the course

2 Simulation warm-up

Here is the function to simulate and plot observations from a hierarchical data-generating process.

-
hierarchical_sim <- function(group_pop_mean,
-                             between_group_sd,
-                             within_group_sd,
-                             n_groups,
-                             n_obs_per_group
-                             ) {
-  # Generate group means
-  group_means <- rnorm(
-    n = n_groups,
-    mean = group_pop_mean,
-    sd = between_group_sd
-  )
-
-  # Generate observations
-
-  ## Create an empty vector for observations
-  y <- numeric()
-  ## Create a vector for the group identifier
-  group <- rep(1:n_groups, each = n_obs_per_group)
-  
-  for (j in 1:n_groups) {
-    ### Generate one group observations
-    group_y <- rnorm(
-      n = n_obs_per_group,
-      mean = group_means[j],
-      sd = within_group_sd
-    )
-    ### Append the group observations to the vector
-    y <- c(y, group_y)
-  }
-
-  # Combine into a data frame
-  data <- data.frame(
-    group = factor(group),
-    y = y
-  )
-
-  # Plot the data
-  ggplot(data, aes(x = y, y = group)) +
-    geom_point() +
-    geom_vline(xintercept = group_pop_mean, linetype = "dashed")
-}
+
hierarchical_sim <- function(group_pop_mean,
+                             between_group_sd,
+                             within_group_sd,
+                             n_groups,
+                             n_obs_per_group
+                             ) {
+  # Generate group means
+  group_means <- rnorm(
+    n = n_groups,
+    mean = group_pop_mean,
+    sd = between_group_sd
+  )
+
+  # Generate observations
+
+  ## Create an empty vector for observations
+  y <- numeric()
+  ## Create a vector for the group identifier
+  group <- rep(1:n_groups, each = n_obs_per_group)
+  
+  for (j in 1:n_groups) {
+    ### Generate one group observations
+    group_y <- rnorm(
+      n = n_obs_per_group,
+      mean = group_means[j],
+      sd = within_group_sd
+    )
+    ### Append the group observations to the vector
+    y <- c(y, group_y)
+  }
+
+  # Combine into a data frame
+  data <- data.frame(
+    group = factor(group),
+    y = y
+  )
+
+  # Plot the data
+  ggplot(data, aes(x = y, y = group)) +
+    geom_point() +
+    geom_vline(xintercept = group_pop_mean, linetype = "dashed")
+}

Example using the function:

-
hierarchical_sim(
-  group_pop_mean = 50,
-  between_group_sd = 5,
-  within_group_sd = 1,
-  n_groups = 10,
-  n_obs_per_group = 5
-  )
+
hierarchical_sim(
+  group_pop_mean = 50,
+  between_group_sd = 5,
+  within_group_sd = 1,
+  n_groups = 10,
+  n_obs_per_group = 5
+  )
Error in ggplot(data, aes(x = y, y = group)): could not find function "ggplot"
@@ -534,53 +512,53 @@

3 Sleep deprivati

Below is some code for fitting a brms model. This model is a simple pooled model. You will need to fit a hierarchical model as explained in the assignment, but this code should help getting started.

Load the dataset

-
data(sleepstudy, package = "lme4")
+
data(sleepstudy, package = "lme4")
Error in find.package(package, lib.loc, verbose = verbose): there is no package called 'lme4'

Specify the formula and observation family:

-
sleepstudy_pooled_formula <- bf(
-  Reaction ~ 1 + Days,
-  family = "gaussian",
-  center = FALSE
-)
+
sleepstudy_pooled_formula <- bf(
+  Reaction ~ 1 + Days,
+  family = "gaussian",
+  center = FALSE
+)

We can see the parameters and default priors with

-
get_prior(pooled_formula, data = sleepstudy)
+
get_prior(pooled_formula, data = sleepstudy)

We can then specify the priors:

-
(sleepstudy_pooled_priors <- c(
-  prior(
-    normal(400, 100),
-    class = "b",
-    coef = "Intercept"
-  ),
-  prior(
-    normal(0, 50),
-    class = "b",
-    coef = "Days"
-  ),
-  prior(
-    normal(0, 50),
-    class = "sigma"
-  )
-))
+
(sleepstudy_pooled_priors <- c(
+  prior(
+    normal(400, 100),
+    class = "b",
+    coef = "Intercept"
+  ),
+  prior(
+    normal(0, 50),
+    class = "b",
+    coef = "Days"
+  ),
+  prior(
+    normal(0, 50),
+    class = "sigma"
+  )
+))

And then fit the model:

-
sleepstudy_pooled_fit <- brm(
-  formula = pooled_formula,
-  prior = pooled_priors,
-  data = sleepstudy
-)
+
sleepstudy_pooled_fit <- brm(
+  formula = pooled_formula,
+  prior = pooled_priors,
+  data = sleepstudy
+)

We can inspect the model fit:

-
summary(pooled_fit)
+
summary(pooled_fit)

@@ -588,64 +566,64 @@

4 School calendar

Meta-analysis models can be fit in brms. When the standard error is known, the se() function can be used to specify it.

The dataset dat.konstantopoulos2011 has the observations for the school calendar intervention meta-analysis.

-
data(dat.konstantopoulos2011, package = "metadat")
+
data(dat.konstantopoulos2011, package = "metadat")

As mentioned in the assignment instructions, a unique identifier for school needs to be created by combining the district and school:

-
schoolcalendar_data <- dat.konstantopoulos2011 |>
-  dplyr::mutate(
-    school = factor(school),
-    district = factor(district),
-    district_school = interaction(district, school, drop = TRUE, sep = "_")
-  )
+
schoolcalendar_data <- dat.konstantopoulos2011 |>
+  dplyr::mutate(
+    school = factor(school),
+    district = factor(district),
+    district_school = interaction(district, school, drop = TRUE, sep = "_")
+  )

Then the models can be fit

-
schoolcalendar_pooled_formula <- bf(
-  formula = yi | se(sqrt(vi)) ~ 1,
-  family = "gaussian"
-)  
-
-schoolcalendar_pooled_fit <- brm(
-  formula = schoolcalendar_pooled_formula,
-  data = schoolcalendar_data
-)
+
schoolcalendar_pooled_formula <- bf(
+  formula = yi | se(sqrt(vi)) ~ 1,
+  family = "gaussian"
+)  
+
+schoolcalendar_pooled_fit <- brm(
+  formula = schoolcalendar_pooled_formula,
+  data = schoolcalendar_data
+)

Predictions for a new school can be made using the posterior_epred function:

-
new_school <- data.frame(
-  school = factor(1),
-  district = factor(1),
-  district_school = factor("1_1"),
-  vi = 0 # the expectation of the prediction is not affected by the sampling variance, so this can be any number
-)
-  
-
-schoolcalendar_post_epred <- posterior_epred(
-    schoolcalendar_pooled_fit,
-    newdata = new_school,
-    allow_new_levels = TRUE
-  )
+
new_school <- data.frame(
+  school = factor(1),
+  district = factor(1),
+  district_school = factor("1_1"),
+  vi = 0 # the expectation of the prediction is not affected by the sampling variance, so this can be any number
+)
+  
+
+schoolcalendar_post_epred <- posterior_epred(
+    schoolcalendar_pooled_fit,
+    newdata = new_school,
+    allow_new_levels = TRUE
+  )

It can be helpful to plot the posterior estimates. Here is a function that will do this:

-
plot_school_posteriors <- function(fit, dataset) {
-  tidybayes::add_predicted_draws(dataset, fit) |>
-    ggplot(
-      aes(
-        x = .prediction,
-        y = interaction(district, school, sep = ", ", lex.order = TRUE))) +
-    tidybayes::stat_halfeye() +
-    ylab("District, school") +
-    xlab("Posterior effect")
-}
+
plot_school_posteriors <- function(fit, dataset) {
+  tidybayes::add_predicted_draws(dataset, fit) |>
+    ggplot(
+      aes(
+        x = .prediction,
+        y = interaction(district, school, sep = ", ", lex.order = TRUE))) +
+    tidybayes::stat_halfeye() +
+    ylab("District, school") +
+    xlab("Posterior effect")
+}

And can be used as follows:

-
plot_school_posteriors(
-  fit = schoolcalendar_pooled_fit,
-  dataset = school_calendar_data
-)
+
plot_school_posteriors(
+  fit = schoolcalendar_pooled_fit,
+  dataset = school_calendar_data
+)
@@ -1137,277 +1115,277 @@

4 School calendar diff --git a/search.json b/search.json index 9b8b848b..37306e23 100644 --- a/search.json +++ b/search.json @@ -277,7 +277,7 @@ "href": "FAQ.html#i-missed-some-deadline-or-wasnt-able-to-do-some-part-of-the-course", "title": "Bayesian Data Analysis course - FAQ", "section": "I missed some deadline or wasn’t able to do some part of the course", - "text": "I missed some deadline or wasn’t able to do some part of the course\n\nI missed the deadline to register for the course in Sisu. Can I join the course?\n\nYes, just register in MyCourses.\n\nI missed the deadline for the assignment. Can you accept my late submission?\n\nOpen MyCourses Quizzes are automatically submitted at the deadline time\nIf you miss the FeedbackFruits deadline first time it’s used due technical problems, but you send the pdf to one of the TAs few minutes after the deadline it can accepted once. As the recommended submission time is before 4pm on Friday, you have in general 56 hours extra hours for submission and several few minute late submissions is not likely just due to the technical problems.\n\nI was not able to do one of the assignments because [some personal problem]. Can I do some extra work?\n\nThings happen and you don’t need to tell the course staff your personal reasons (especially you shouldn’t tell any health issue details). Everyone gets a second change in period III. In period III there is just one submission deadline, but otherwise the procedure is the same (ie. you need to return all the assignments). If you submitted the project work in autumn you don’t need to re-submit it if you re-submit assignments.\n\nI missed the deadline to register project group. Can I still register?\n\nYes. Those who registered early are allowed to choose the presentation slots first.\n\nMy group member a) disappeared, b) doesn’t do anything, c) is annoying. Can I continue with the project alone.\n\nFirst we hope you can resolve the issue, but if nothing works, then you can continue the project work alone.\n\nI was not able a) to do the project or b) to give a presentation because [some personal problem]. Can a) I submit it later, b) present later.\n\nThings happen and you don’t need to tell the course staff your personal reasons (especially you shouldn’t tell any health issue details). Everyone gets a second change in period III. In period III there is second project submission deadline and presentation slots. If you are happy with your assignment score, you don’t need to re-submit assignments if you submit the project work in period III." + "text": "I missed some deadline or wasn’t able to do some part of the course\n\nCan I combine results from assignments, project, presentation, and e-exam made in different periods / years?\n\nYes.\n\nI missed the deadline to register for the course in Sisu. Can I join the course?\n\nYes, just register in MyCourses and contact student services that they add you in Sisu, too.\n\nI missed the deadline for the assignment. Can you accept my late submission?\n\nOpen MyCourses Quizzes are automatically submitted at the deadline time \n\nI was not able to do one of the assignments because [some personal problem]. Can I do some extra work?\n\nThings happen and you don’t need to tell the course staff your personal reasons (especially you shouldn’t tell any health issue details). Everyone gets a second change in period III. In period III there is just one submission deadline, but otherwise the procedure is the same (ie. you need to return all the assignments). If you submitted the project work in autumn you don’t need to re-submit it if you re-submit assignments.\n\nI missed the deadline to register project group. Can I still register?\n\nYes. Those who registered early are allowed to choose the presentation slots first.\n\nMy group member a) disappeared, b) doesn’t do anything, c) is annoying. Can I continue with the project alone.\n\nFirst we hope you can resolve the issue, but if nothing works, then you can continue the project work alone.\n\nI was not able a) to do the project or b) to give a presentation because [some personal problem]. Can a) I submit it later, b) present later.\n\nThings happen and you don’t need to tell the course staff your personal reasons (especially you shouldn’t tell any health issue details). Everyone gets a second change in period III. In period III there is second project submission deadline and presentation slots. If you are happy with your assignment score, you don’t need to re-submit assignments if you submit the project work in period III." }, { "objectID": "FAQ.html#recommended-courses-after-bayesian-data-analysis", diff --git a/sitemap.xml b/sitemap.xml index fcdb96e7..fe284749 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,50 +2,50 @@ https://avehtari.github.io/BDA_course_Aalto/ta_info_gsu.html - 2024-11-12T08:43:13.753Z + 2024-11-12T10:31:47.737Z https://avehtari.github.io/BDA_course_Aalto/project.html - 2024-11-12T08:43:13.305Z + 2024-11-12T10:31:47.289Z https://avehtari.github.io/BDA_course_Aalto/gsu2023.html - 2024-11-12T08:43:13.305Z + 2024-11-12T10:31:47.289Z https://avehtari.github.io/BDA_course_Aalto/assignments_gsu.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/FAQ.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/Aalto2024.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/Aalto2023.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/BDA3_notes.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/assignments.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/demos.html - 2024-11-12T08:43:13.257Z + 2024-11-12T10:31:47.241Z https://avehtari.github.io/BDA_course_Aalto/index.html - 2024-11-12T08:43:13.305Z + 2024-11-12T10:31:47.289Z https://avehtari.github.io/BDA_course_Aalto/project_gsu.html - 2024-11-12T08:43:13.305Z + 2024-11-12T10:31:47.289Z