moved model comparison with LOO back to lecture 9

avehtari · Nov 4, 2024 · bd61bde · bd61bde
1 parent d2e85a7
commit bd61bde
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 324 deletions.
diff --git a/slides/BDA_lecture_8b.pdf b/slides/BDA_lecture_8b.pdf
diff --git a/slides/BDA_lecture_8b.tex b/slides/BDA_lecture_8b.tex
@@ -319,10 +319,10 @@
      \item See also \url{https://users.aalto.fi/~ave/modelselection/CV-FAQ.html}
 
      \end{list2}
-   \item 7.3 Model comparison based on predictive performance\\
      \end{itemize}
 Next week
    \begin{itemize}
+   \item 7.3 Model comparison based on predictive performance\\
    \item 7.4 Model comparison using Bayes factors\\
    \item 7.5 Continuous model expansion / sensitivity analysis
    \item {\color{gray}7.5 Example (may be skipped)}
@@ -1149,330 +1149,7 @@
    \end{list2}}
 \end{frame}
 
-\begin{frame}{Model comparison and selection}
-
-  Today
-
-  \begin{list1}
-  \item Model comparison and selection (elpd\_diff, se)
-  \end{list1}
-
-  Next lecture
-
-  \begin{list1}
-  \item {\footnotesize Related methods (WAIC, *IC, BF)}
-  \item Hypothesis testing
-  \item Potential overfitting
-  \item Model expansion and averaging
-\end{list1}
-
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- Posterior predictive distributions}
-\framesubtitle{with \texttt{tidybayes}}
-  
-\vspace{-0.75\baselineskip}  
-Latent hierarchical linear model\\  
-  \hspace{-7mm}
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_lbinom_preds.pdf}
-  \end{minipage}
-  
-\vspace{-0.25\baselineskip}  
-Latent hierarchical linear model + spline\\  
-  \hspace{-7mm}
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-  \includegraphics[height=3.6cm]{student_retention_sbinom_preds.pdf}
-  \end{minipage}  
-
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- Marginal PPC}
-\framesubtitle{\texttt{pp\_check(fit, ndraws=100)}}
-
-  
-\vspace{-0.75\baselineskip}  
-Latent hierarchical linear model\\  
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_lbinom_ppc_dens_overlay.pdf}
-  \end{minipage}
-  
-\vspace{-0.5\baselineskip}  
-Latent hierarchical linear model + spline\\  
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_sbinom_ppc_dens_overlay.pdf}
-  \end{minipage}  
-
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- LOO intervals}
-
-\vspace{-0.5\baselineskip}  
-LOO predictive intervals -- latent hierarchical linear\\  
-  \hspace{-7mm}
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_lbinom_ppc_loo_intervals.pdf}
-  \end{minipage}  
-
-\vspace{-0.5\baselineskip}  
-LOO predictive intervals -- latent hierarchical linear + spline\\  
-  \hspace{-7mm}
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_sbinom_ppc_loo_intervals.pdf}
-  \end{minipage}  
-
-\end{frame}
-  
-\begin{frame}[fragile]{Student retention -- LOO-PIT checking}
-\framesubtitle{\texttt{pp\_check(fit, type = "loo\_pit\_qq", ndraws=4000)}}
-
-\vspace{-0.5\baselineskip}  
-LOO-PIT check -- latent hierarchical linear\\  
-  \hspace{-7mm}
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_lbinom_ppc_loo_pit_qq.pdf}
-  \end{minipage}  
-
-\vspace{-0.5\baselineskip}  
-LOO-PIT check -- latent hierarchical linear + spline\\  
-  \hspace{-7mm}
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-    \includegraphics[height=3.6cm]{student_retention_sbinom_ppc_loo_pit_qq.pdf}
-  \end{minipage}  
-
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- $R^2$}
-
-Latent hierarchical linear vs. latent hierarchical linear + spline
-  
-\begin{minted}[fontsize=\footnotesize]{text}
-> loo_R2(fit4) |> round(digits=2)
-   Estimate Est.Error Q2.5 Q97.5
-R2     0.92      0.02 0.88  0.95
-
-> loo_R2(fit6) |> round(digits=2)
-   Estimate Est.Error Q2.5 Q97.5
-R2     0.97      0.01 0.95  0.98
-\end{minted}
-
-  $R^2$ measures the goodness of the mean of the predictive
-  distribution
-
-  \vspace{4\baselineskip}
-{\color{gray}\footnotesize \href{https://doi.org/10.1080/00031305.2018.1549100}{Gelman, Goodrich, Gabry, and Vehtari (2019). R-squared for Bayesian regression models. \textit{The American Statistician}, 73(3):307-309.}}
-  
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- log score -- elpd }
 
-  \vspace{-\baselineskip}
-  \begin{itemize}
-  \item information theoretical goodness of the whole distribution
-  \item elpd = expected log predictive density (probability)
-  \item elpd\_loo = estimated with LOO predictive densities / probs\\
-    $\sum_{n=1}^N \log p(y_i | x_i, x_{-i}, y_{-i})$
-  \end{itemize}
- 
-% \vspace{-0.5\baselineskip}  
-% LOO predictive intervals -- latent hierarchical linear\\  
-%   \hspace{-7mm}
-%   \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-%     \includegraphics[height=3.6cm]{student_retention_sbinom_ppc_loo_intervals.pdf}
-%   \end{minipage}  
-
-%\vspace{-0.5\baselineskip}  
-\only<2->{
-  LOO predictive intervals -- latent hierarchical linear + spline\\  
-  \begin{minipage}[t][3.6cm][t]{1.0\linewidth}
-  \hspace{-9mm}
-    \includegraphics[height=3.6cm]{student_retention_sbinom_ppc_loo_intervals.pdf}
-  \end{minipage}
-}
-
-\vspace{-1.75\baselineskip}
-\only<3->{\fontsize{6.95}{9}\selectfont {~~~~~-8.4 -5.6 -2.9 -2.9 -2.8 -3.0 -4.0 -3.2 -3.9 -3.2 -3.4 -3.2 -2.9 -3.9 -3.4 -3.4 -3.2 -2.7 -2.8 -3.1\\
-    ~~~~~~~~~-2.5 -2.8 -2.9 -3.4 -5.4 -3.7 -3.1 -3.3 -3.5 -3.2 -3.5 -3.5 -6.6 -3.8 -3.7 -3.4 -2.5 -2.8 -2.9 -3.3\\
-  }}
-\uncover<4->{\footnotesize $\sum = $ -141.7}
-  
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- elpd\_loo}
-
-Latent hierarchical linear + spline
-\begin{minted}[fontsize=\footnotesize,highlightlines=6]{text}
-> loo(fit6)
-
-Computed from 4000 by 40 log-likelihood matrix
-
-         Estimate   SE
-elpd_loo   -141.7  7.2
-p_loo        10.9  2.5
-\end{minted}
-
-\pause
-Latent hierarchical linear
-\begin{minted}[fontsize=\footnotesize,highlightlines=6]{text}
-> loo(fit4)
-
-Computed from 4000 by 40 log-likelihood matrix
-
-         Estimate   SE
-elpd_loo   -184.3 17.3
-p_loo        24.3  5.8
-\end{minted}
-
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- log score -- elpd }
-
-  \vspace{-\baselineskip}
-{
-  {\small LOO predictive intervals -- latent hierarchical linear}\\  
-  \begin{minipage}[t][2.8cm][t]{1.2\linewidth}
-  \hspace{-9mm}
-    \includegraphics[height=2.8cm,trim=0 40 0 0,clip]{student_retention_lbinom_ppc_loo_intervals.pdf}
-  \end{minipage}
-}
-
-\vspace{-1.75\baselineskip}
-{\fontsize{6.7}{8}\selectfont {~~~~-15.7  -7.6  -3.9  -2.9  -6.7  -4.2  -2.9  -3.1 -12.9  -4.7  -3.3  -3.4 -9.0  -3.0  -3.3  -3.2  -8.2  -2.8  -3.2  -3.0\\
-    ~~~~~~~~~-2.9 -3.3 -3.0 -4.6 -4.3 -3.3 -3.0 -4.0 -3.0 -5.6 -3.6 -5.4 -4.9 -3.6 -3.9 -5.2 -2.7 -3.7 -3.0 -4.1
-  }}
-{\scriptsize $\sum = $ -184.3}
-
-{
-  {\small LOO predictive intervals -- latent hierarchical linear + spline}\\
-  \begin{minipage}[t][2.8cm][t]{1.2\linewidth}
-  \hspace{-9mm}
-    \includegraphics[height=2.8cm,trim=0 40 0 0,clip]{student_retention_sbinom_ppc_loo_intervals.pdf}
-  \end{minipage}
-}
-
-\vspace{-1.75\baselineskip}
-{\fontsize{6.7}{8}\selectfont {~~~~~-8.4 -5.6 -2.9 -2.9 -2.8 -3.0 -4.0 -3.2 -3.9 -3.2 -3.4 -3.2 -2.9 -3.9 -3.4 -3.4 -3.2 -2.7 -2.8 -3.1\\
-    ~~~~~~~~~-2.5 -2.8 -2.9 -3.4 -5.4 -3.7 -3.1 -3.3 -3.5 -3.2 -3.5 -3.5 -6.6 -3.8 -3.7 -3.4 -2.5 -2.8 -2.9 -3.3
-  }}
-{\scriptsize $\sum = $ -141.7}
-  
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- elpd\_loo}
-
-  \vspace{-0.7\baselineskip}
-  
-\hspace{-5mm}Latent hierarchical linear (fit4) vs latent hierarchical linear + spline (fit6)
-
-\only<+>{\hspace{-5mm}\includegraphics[height=7cm]{student_retention_loo_pointwise_scatter.pdf}}
-\only<+>{\hspace{-5mm}\includegraphics[height=7cm]{student_retention_loo_pointwise_diff_scatter.pdf}}
-\only<+>{\hspace{-5mm}\includegraphics[height=7cm]{student_retention_loo_pointwise_diff_histogram_1.pdf}}
-\only<+>{\hspace{-5mm}\includegraphics[height=7cm]{student_retention_loo_pointwise_diff_histogram_2.pdf}}
-\only<+->{\hspace{-5mm}\includegraphics[height=7cm]{student_retention_loo_pointwise_diff_histogram_3.pdf}}
-\only<+->{
-  \begin{minipage}[t][4cm][t]{3.2cm}
-    \vspace{-10.5\baselineskip}
-    mean $\approx 1.07$\\
-    \only<+->{sd $\approx 2.26$\\}
-    \only<+->{SE = sd/$\sqrt{40}\approx 0.36$\\}
-    \only<+->{\\sum $\approx 42.6$\\}
-    \only<+->{SE = sd$*\sqrt{40}\approx 14.3$\\}
-  \end{minipage}
-}
-
-\end{frame}
-
-\begin{frame}[fragile]{Student retention -- elpd\_loo}
-
-  {\color{gray}
-Latent hierarchical linear + spline
-\begin{minted}[fontsize=\footnotesize,highlightlines=3]{text}
-> loo(fit6)
-         Estimate   SE
-elpd_loo   -141.7  7.2
-p_loo        10.9  2.5
-\end{minted}
-
-Latent hierarchical linear
-\begin{minted}[fontsize=\footnotesize,highlightlines=3]{text}
-> loo(fit4)
-         Estimate   SE
-elpd_loo   -184.3 17.3
-p_loo        23.8  5.7
-\end{minted}
-}
-
-\begin{minted}[fontsize=\footnotesize,highlightlines={2-4}]{text}
-> loo_compare(loo(fit4), loo(fit6))
-     elpd_diff se_diff
-fit6   0.0       0.0  
-fit4 -42.6      14.3  
-\end{minted}
-
-\end{frame}
-
-\begin{frame}{LOO difference uncertainty estimate (SE) reliability}
-\vspace{-0.2\baselineskip}
-
-  \begin{list1}
-  \item[1.] The models make very similar predictions
-    \begin{list2}
-    \item<2-> if $|\mbox{elpd\_loo}|<4$, SE is not reliable, but the
-      difference is small anyway
-    \item<2-> selecting a ``wrong'' model has small cost
-    \item<2-> in nested case, the skewness favors the simpler model
-    \end{list2}
-  \item[2.] The models are misspecified with outliers in the data
-    \begin{list2}
-    \item<3-> in nested case, the bias favors the simpler model
-    \item<3-> model checking and model extension to avoid misspecified
-      models (Bayesian workflow)
-    \end{list2}
-  \item[3.] The number of observations is small
-    \begin{list2}
-    \item<4-> in nested case the skewness favors the simpler model
-    \item<4-> any inference with small $n$ is difficult
-    \item<4-> if $|\mbox{elpd\_loo}|>4$, model is well specified,
-      and $n>100$ then the normal approximation is good
-    \end{list2}
-  \end{list1}
-
-{\color{gray}\footnotesize  Sivula, Magnusson, Matamoros, and Vehtari (2022). Uncertainty in Bayesian leave-one-out cross-validation based model comparison. \textit{\href{https://arxiv.org/abs/2008.10296v3}{arXiv:2008.10296v3}}.}
-  
-\end{frame}
-
-\begin{frame}{Log score and elpd\_loo}
-
-  \begin{itemize}
-  \item Log score is not easily interpretable
-  \item but is information theoretically good utility for the goodness
-    of the whole distribution
-  \item and thus is useful in model comparison
-  \end{itemize}
-
-\end{frame}
-
-\begin{frame}{Log score and elpd\_loo}
-
-  \begin{itemize}
-  \item Interpretation in discrete case
-    \begin{itemize}
-    \item log probability
-    \end{itemize}
-  \item<2-> For example
-    \begin{itemize}
-    \item $\frac{1}{N}\sum_{n=1}^N\exp(\mathrm{elpd}_{\mathrm{loo},n}) \approx 4\%$ probability that we predict the
-      observed value
-    \item<3-> compare to guessing uniformly from the data range [121,310] having
-      $1/(310-121+1) \approx 0.5\%$ probability \only<4->{(log score -210)}
-    \end{itemize}
-  \item<5-> Interpretation in continuous case
-    \begin{itemize}
-    \item can be compared to a simple reference distribution
-    \end{itemize}
-  \end{itemize}
-
-\end{frame}
 
 % \begin{frame}[fragile]{Student retention -- loo computation}
 
@@ -1502,6 +1179,7 @@
   \frametitle{Next week}
 
   \begin{itemize}
+  \item Model comparison with LOO-CV
   \item When is cross-validation applicable?
     \begin{list2}
     \item data generating mechanisms and prediction tasks