From 87b10b163398bbbdb7d6c2fdb8ddec11bc0ae4b3 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Fri, 15 Mar 2024 10:32:37 +1000
Subject: [PATCH 01/13] [WIP] PIC documentation

---
 doc/src/crappy-pic.rst        | 164 ++++++++++++++++++++++++++++++++++
 doc/src/index.rst             |   2 +
 doc/src/sparse-gp-details.rst |  79 ++++++++++++++++
 3 files changed, 245 insertions(+)
 create mode 100644 doc/src/crappy-pic.rst

diff --git a/doc/src/crappy-pic.rst b/doc/src/crappy-pic.rst
new file mode 100644
index 00000000..0005dc14
--- /dev/null
+++ b/doc/src/crappy-pic.rst
@@ -0,0 +1,164 @@
+#################################################
+PIC math scratchpad
+#################################################
+
+.. _crappy-pic:
+
+-------------------------------------------------------------------
+Relationship between :math:`\sigma_\cancel{B}` and :math:`\sigma_f`
+-------------------------------------------------------------------
+
+It seems like what we need first of all is some way to get the PITC posterior variance based only on :math:`\cancel{B}` using only the full PITC posterior and calculations that scale only with :math:`|B|`.  I'm not totally finished working out how :math:`(\sigma^2_{*})^{\cancel{B} \cancel{B}}` fits into the whole posterior :math:`(\sigma^2_*)^{PIC}`
+
+Observe that since :math:`\mathbf{\Lambda}` is a block-diagonal matrix, its inverse is too, so when it's in the middle of a quadratic form, the cross terms disappear.  As far as I can tell, this is the only place we can additively separate :math:`\mathbf{Q}_{\cancel{B} \cancel{B}}` and :math:`\mathbf{K}_{B B}` without explicit cross terms.  Start with the usual application of Woodbury's lemma to the PITC posterior covariance:
+
+.. math::
+
+            \newcommand{Lam}{\mathbf{\Lambda}}
+            \newcommand{Kuu}{\mathbf{K}_{uu}}
+            \newcommand{Kuf}{\mathbf{K}_{uf}}
+            \newcommand{Kfu}{\mathbf{K}_{fu}}
+            (\sigma_*^2) &= K_* - \mathbf{Q}_{* f} \left(\mathbf{K}_{fu} \mathbf{K}_{uu}^{-1} \mathbf{K}_{uf} + \mathbf{\Lambda} \right)^{-1} \mathbf{Q}_{f *} \\
+            &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \Kuf \left(\Kfu \Kuu^{-1} \Kuf + \Lam\right)^{-1} \Kfu \Kuu^{-1} \mathbf{K}_{u*} \\
+            &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \mathbf{K}_{u*}
+
+Now break up :math:`\Kuf` and :math:`\Lam`:
+
+.. math::
+
+            \Kuf &= \begin{bmatrix} \mathbf{K}_{u \cancel{B}} & \mathbf{K}_{u B} \end{bmatrix} \\
+            \Lam &= \begin{bmatrix} \Lam_{\cancel{B}} & \mathbf{0} \\ \mathbf{0} & \Lam_{B} \end{bmatrix} \\
+
+so that
+
+.. math::
+
+            \Kuf \Lam^{-1} \Kfu &= \begin{bmatrix} \mathbf{K}_{u \cancel{B}} & \mathbf{K}_{u B} \end{bmatrix}
+            \begin{bmatrix} \Lam_{\cancel{B}}^{-1} & \mathbf{0} \\ \mathbf{0} & \Lam_{B}^{-1} \end{bmatrix}
+            \begin{bmatrix} \mathbf{K}_{\cancel{B} u} \\ \mathbf{K}_{B u} \end{bmatrix} \\
+            &= \mathbf{K}_{u \cancel{B}} \Lam_{\cancel{B}}^{-1} \mathbf{K}_{\cancel{B} u} + 
+            \mathbf{K}_{u B} \Lam_{B}^{-1} \mathbf{K}_{B u} \\
+            \mathbf{K}_{u \cancel{B}} \Lam_{\cancel{B}}^{-1} \mathbf{K}_{\cancel{B} u} &= \Kuf \Lam^{-1} \Kfu - \mathbf{K}_{u B} \Lam_{B}^{-1} \mathbf{K}_{B u}
+
+This last one is the inverse term in the training-data dependent part of the PITC posterior covariance using only the training points in :math:`\cancel{B}`, which I think is what we want to combine with the dense posterior from the training points in :math:`B`:
+
+.. math::
+            (\sigma_*^2)^{\cancel{B} \cancel{B}} = K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \Kuu + \mathbf{K}_{u \cancel{B}} \Lam_{\cancel{B}}^{-1} \mathbf{K}_{\cancel{B} u} \right)^{-1} \mathbf{K}_{u*}
+
+which we can write in terms of only the full PITC prior and the prior for group :math:`B`:
+
+.. math::
+            (\sigma_*^2)^{\cancel{B} \cancel{B}} = K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \Kuu + \Kuf \Lam^{-1} \Kfu - \mathbf{K}_{u B} \Lam_{B}^{-1} \mathbf{K}_{B u} \right)^{-1} \mathbf{K}_{u*}
+
+If you set :math:`B = \emptyset`, removing the correction for the training points in block :math:`B` entirely, you have the original PITC posterior covariance based on the full set of training points :math:`f`, as expected.  Now we only have to compute :math:`\Kuf \Lam^{-1} \Kfu` (the term that scales with the full training set :math:`f`) once, and nothing that scales with :math:`|\cancel{B}|`.  This still involves an :math:`O(M^3)` decomposition for each group :math:`B`.  I have been assuming our group size :math:`|B|` is significantly smaller than the number of inducing points :math:`M`, so it's not great, but an interesting thing happens if we use the "downdate" form of Woodbury's lemma in the opposite direction to the usual way:
+
+.. math::
+            \left(A - B C^{-1} B^T \right)^{-1} &= A^{-1} + A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+
+Letting :math:`\mathbf{A} = \Kuu + \Kuf \Lam^{-1} \Kfu` (playing the part of :math:`A` in Woodbury's lemma):
+
+.. math::
+
+            (\sigma_*^2)^{\cancel{B} \cancel{B}} &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \left( \mathbf{A}^{-1} + \mathbf{A}^{-1} \mathbf{K}_{u B} \left(\Lam_{B} - \mathbf{K}_{B u} \mathbf{A}^{-1} \mathbf{K}_{u B}\right)^{-1} \mathbf{K}_{B u} \mathbf{A}^{-1} \right) \mathbf{K}_{u*} \\
+            &= K_* - \mathbf{K}_{*u} \Kuu^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \mathbf{A}^{-1} \mathbf{K}_{u*} + \mathbf{K}_{*u} \mathbf{A}^{-1} \mathbf{K}_{u B} \left(\Lam_{B} - \mathbf{K}_{B u} \mathbf{A}^{-1} \mathbf{K}_{u B}\right)^{-1} \mathbf{K}_{B u} \mathbf{A}^{-1} \mathbf{K}_{u*} \\
+
+I find the use of Woodbury's lemma to downdate a bit uncomfortable -- does this work with real symmetric matrices?  But we would only need to decompose :math:`\mathbf{A}` (and also :math:`\Kuu`) at a cost of :math:`O(M^3)` once; the inversion to be calculated per group costs :math:`O(|B|^3)`.  I also don't immediately spot a slick QR decomposition way to do this per-group term; it's pretty symmetrical.  The only thing that jumps out is precomputing :math:`\mathbf{K}_{B u} \mathbf{A}^{-\frac{1}{2}}`.
+
+----------------------------------------------
+Derivation of Woodbury's Lemma for Differences
+----------------------------------------------
+
+This is specifically for symmetric matrices, to convince myself the downdate formula is legit.  We would like to compute a rank-:math:`k` downdate to the inverse of a matrix :math:`A`.  
+
+.. math::
+            \left(A - B C^{-1} B^T \right)^{-1} &= A^{-1} + A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+
+To be extra clear, let's say :math:`A \in \mathbb{R}^{n \times n}`, :math:`B \in \mathbb{R}^{n \times k}`, :math:`C \in \mathbb{R}^{k \times k}`.  We put the relevant matrices into a block matrix :math:`M \in \mathbb{R}^{m \times m}` for :math:`m = n + k`, and label the conforming blocks of its inverse:
+
+.. math::
+            M &= \begin{pmatrix} A & B \\ B^T & C\end{pmatrix} \\
+            M^{-1} &= \begin{pmatrix} W & X \\ X^T & Y\end{pmatrix} \\
+            M M^{-1} &= \begin{pmatrix} \mathbf{I} & \mathbf{0} \\ \mathbf{0} & \mathbf{I} \end{pmatrix} \\
+            M^{-1} M &= \begin{pmatrix} \mathbf{I} & \mathbf{0} \\ \mathbf{0} & \mathbf{I} \end{pmatrix}
+
+The blocks of :math:`M M^{-1}` yield the following equations:
+            
+.. math::
+            AW + BX^T &= \mathbf{I} \\
+            AX + BY &= \mathbf{0} \\
+            B^T W + CX^T &= \mathbf{0} \\
+            B^T X + CY &= \mathbf{I} \\
+
+Rearrange the middle two:
+
+.. math::
+            X &= -A^{-1} B Y \\
+            X^T &= -C^{-1} B^T W
+
+If we do the same for :math:`M^{-1} M`:
+
+.. math::
+            X &= -W B C^{-1} \\
+            X^T &= -Y B^T A^{-1}
+
+These blocks are equal:
+
+.. math::
+            C^{-1} B^T W &= Y B^T A^{-1} \\
+            A^{-1} B Y &= W B C^{-1} \\
+
+Now use the middle two equations from :math:`M M^{-1}` and plug them into the first and last equations irrespectively:
+
+.. math::
+            W - A^{-1} B C^{-1} B^T W &= A^{1} \\
+            \left(I - A^{-1} B C^{-1} B^T\right) W &= A^{-1} \\
+            -C^{-1} B^T A B Y + Y &= C^{-1} \\
+            \left(I - C^{-1} B^T A^{-1} B\right)Y &= C^{-1} \\
+
+Assuming :math:`\left(I - A^{-1} B C^{-1} B^T\right)` and :math:`\left(I - C^{-1} B^T A^{-1} B\right)` are invertible, rearrange:
+
+.. math::
+            W &= \left(I - A^{-1} B C^{-1} B^T\right)^{-1} A^{-1} \\
+            &= \left(A - B C^{-1} B^T\right)^{-1} \\
+            Y &= \left(I - C^{-1} B^T A^{-1} B\right)^{-1} C^{-1} \\
+            &= \left(C - B^T A^{-1} B\right)^{-1}
+
+Now use equality of the off-diagonal blocks from the two ways to multiply :math:`M` and :math:`M^{-1}` above (don't you wish Sphinx would number equations?) and substitute:
+
+.. math::
+            C^{-1} B^T \left(A - B C^{-1} B^T\right)^{-1} &= \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+            \left(A - B C^{-1} B^T\right)^{-1} B C^{-1} &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} \\
+
+Let's look just at the term :math:`\left(A - B C^{-1} B^T\right)` and right-multiply by :math:`-A^{-1}`:
+
+.. math::
+            \left(A - B C^{-1} B^T\right) \left(-A^{-1}\right) &= -\mathbf{I} + B C^{-1} B^T A^{-1} \\
+            \mathbf{I} + \left(A - B C^{-1} B^T\right) \left(-A^{-1}\right) &= B C^{-1} B^T A^{-1}
+
+Now let's return to the previous equation and right-multiply by :math:`B^T A^{-1}`:
+
+.. math::
+            \left(A - B C^{-1} B^T\right)^{-1} B C^{-1} B^T A^{-1} &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+
+Substitute the previous result for :math:`B C^{-1} B^T A^{-1}`:
+
+.. math::
+            \left(A - B C^{-1} B^T\right)^{-1} \left( \mathbf{I} + \left(A - B C^{-1} B^T\right) \left(-A^{-1}\right) \right) &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+            \left(A - B C^{-1} B^T\right)^{-1} - A^{-1} &= A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \\
+            \left(A - B C^{-1} B^T \right)^{-1} &= A^{-1} + A^{-1} B \left(C - B^T A^{-1} B\right)^{-1} B^T A^{-1} \blacksquare
+
+which is the difference form of Woodbury's lemma, or a rank-:math:`k` downdate of :math:`A^{-1}`.  The main assumption seems to be that :math:`\left(I - A^{-1} B C^{-1} B^T\right)` and :math:`\left(I - C^{-1} B^T A^{-1} B\right)` are invertible.
+
+------------------------
+Blockwise inversion of S
+------------------------
+
+I tried to invert :math:`\mathbf{S}` blockwise; it didn't work out but I'm leaving it in here just to look back at:
+
+.. math::
+            \newcommand{VV}{\mathbf{V}}
+            \mathbf{U} &= \mathbf{Q}_{* \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B} \VV_{B *} \\
+            &= \mathbf{Q}_{* \cancel{B}} \left( \mathbf{Q}_{\cancel{B} \cancel{B}}^{-1} \mathbf{Q}_{\cancel{B} B} \left(\mathbf{Q}_{B B} - \mathbf{Q}_{B \cancel{B}} \mathbf{Q}_{\cancel{B} \cancel{B}}^{-1} \mathbf{Q}_{\cancel{B} B}\right)^{-1} \right) \VV_{B *}
+            
+
+How are we going to avoid the :math:`O(|\cancel{B}|^3)` inversion for :math:`\mathbf{Q}_{\cancel{B} \cancel{B}}`?  Also we are going to get cross-terms with both :math:`\mathbf{Q}_{* \cancel{B}}` and :math:`\mathbf{Q}_{B B}` involved, but maybe they are OK because they are only :math:`O(|\cancel{B}| |B|)`?
\ No newline at end of file
diff --git a/doc/src/index.rst b/doc/src/index.rst
index e6176d7c..d82bfc21 100644
--- a/doc/src/index.rst
+++ b/doc/src/index.rst
@@ -97,6 +97,8 @@ and plot the results (though this'll require a numerical python environment),
     custom-models
     references
 
+    crappy-pic
+
 
 #########
 Credit
diff --git a/doc/src/sparse-gp-details.rst b/doc/src/sparse-gp-details.rst
index ce5bab0a..c358ee6e 100644
--- a/doc/src/sparse-gp-details.rst
+++ b/doc/src/sparse-gp-details.rst
@@ -338,3 +338,82 @@ However, we found that while the posterior mean predictions were numerically sta
 
 You should be able to find this implementation and details using git history.
 
+--------------------
+PIC
+--------------------
+
+The way I started partitioning the covariance term into blocks is as follows:
+
+.. math::
+
+            (\sigma_*^2)^{PIC} &= K_* - \tilde{\mathbf{K}}^{PIC}_{*f} \left[ \tilde{\mathbf{K}}^{PITC}_{ff} \right]^{-1} \tilde{\mathbf{K}}^{PIC}_{f*} + \sigma^2 \\
+            &= K_* - \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{K}_{* B} \end{bmatrix} \left(\mathbf{Q}_{ff} - \mathtt{blkdiag}(\mathbf{K}_{ff} - \mathbf{Q}_{ff})\right)^{-1} \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{K}_{B *} \end{bmatrix} \\
+            &= K_* - \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{K}_{* B} \end{bmatrix} \left(\mathbf{K}_{fu} \mathbf{K}_{uu}^{-1} \mathbf{K}_{uf} + \mathbf{\Lambda} \right)^{-1} \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{K}_{B *} \end{bmatrix} 
+
+The problem with doing this for PIC covariance (vs. PIC mean and PITC) is that we can't left-multiply the whole thing by :math:`\mathbf{K}_{uu}^{-1} \mathbf{K}_{uf}` (which in those instances leads to applying Woodbury's lemma to reduce the inverse to the size of the number of inducing points :math:`M`) because :math:`\mathbf{K}_{*B}` is not a low-rank approximation using the inducing points.  We can instead break up the inverse term into blocks:
+
+.. math::
+            \newcommand{VV}{\mathbf{V}} 
+            (\sigma_*^2)^{PIC} &= K_* -
+            \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{K}_{* B}\end{bmatrix}
+            \begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1}
+            \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{K}_{B *}\end{bmatrix}
+
+If we substitute :math:`\mathbf{K}_{* B} = \mathbf{Q}_{* B} + \VV_{* B}` as with the mean, it doesn't work out nicely:
+            
+.. math::
+            (\sigma_*^2)^{PIC} &= K_* - 
+            \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{Q}_{* B} + \VV_{* B} \end{bmatrix}
+            \underbrace{\begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1}}_{\mathbf{S}^{-1}}
+            \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{Q}_{B *} + \VV_{B *}\end{bmatrix} \\
+            &= K_* - \mathbf{Q}_{**}^{PITC} - \underbrace{\mathbf{Q}_{* \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B} \VV_{B *}}_{\mathbf{U}} - \mathbf{U}^T - \mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}
+
+Now we have 3 correction terms to apply to the posterior PITC covariance.  The best thing I can think of is to apply Woodbury's lemma, but in the opposite direction to usual:
+
+.. math::
+            \newcommand{Lam}{\mathbf{\Lambda}}
+            \newcommand{Kuu}{\mathbf{K}_{uu}}
+            \newcommand{Kuf}{\mathbf{K}_{uf}}
+            \newcommand{Kfu}{\mathbf{K}_{fu}}
+            \mathbf{S}^{-1} &= \left(\Kfu \Kuu^{-1} \Kuf + \Lam\right)^{-1} \\
+            &= \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1}
+
+which involves decomposing the block-diagonal matrix :math:`\Lam` with blocks of size :math:`|B|` and a matrix the size :math:`M` of the inducing point set.  In practice after we precompute terms, we have a sequence of triangular factors that we can subset as needed to pick out :math:`B` and :math:`\cancel{B}`.  (Confusingly, one of these useful decompositions is the QR decomposition of the unrelated matrix :math:`B` in the PITC derivation above.)  
+
+.. math::
+            \mathbf{U} &= \mathbf{Q}_{* \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B} \VV_{B *} \\
+            &= \mathbf{Q}_{* \cancel{B}} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{\cancel{B} B} \VV_{B *}
+
+:math:`\Lam^{-1}_{\cancel{B} B}` is zero by the definition of a block-diagonal matrix.
+
+.. math::
+            \mathbf{U} &= - \mathbf{Q}_{* \cancel{B}} \left(\Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{\cancel{B} B} \VV_{B *} \\
+
+This looks appealingly like we could just keep combining instances of the QR decomposition of :math:`B`, but that would leave out the :math:`\mathbf{K}_{uu}` part.
+
+The open question is how to efficiently distill this into various correction terms for each group that don't require operations that scale with :math:`\cancel{B}`, since the paper promises :math:`O((|B| + M)^2)` for predictive covariances after precomputation.  In principle, using sparse vectors / matrices for the :math:`*` target components, combined with Eigen's expression templates, should bring the complexity of some of these repeated solves for mostly empty vectors (for :math:`B`) down to :math:`O(|B|)`, and likewise for inducing points.
+
+At this point, our cross-terms are preceded by :math:`Q_{*\cancel{B}}`, which expands to :math:`K_{*u} K_{uu}^{-1} K_{u\cancel{B}}`.  So actually we should be able to precompute everything except :math:`K_{*u}`, leaving prediction-time computations to scale with :math:`M`!
+
+So for the variance, we must precompute:
+
+ - :math:`P^TLDL^TP = \Lam`
+ - :math:`\Lam^{-1} \Kfu`
+ - :math:`L_{uu} L_{uu}^T = \Kuu`
+ - :math:`QRP^T = B = \begin{bmatrix}\Lam^{-\frac{1}{2}}\Kfu \\ L_{uu} \end{bmatrix}` such that :math:`\mathbf{S}^{-1} = \left(B^T B\right)^{-1}`, and blocks :math:`\mathbf{S}^{-1}_{a b}` can be got by choosing the right rows / columns with which to do permutations and back-substitutions.
+
+then for each group :math:`B`:
+
+ - :math:`\mathbf{W}_B = \Kuu^{-1} \mathbf{K}_{u \cancel{B}} \mathbf{S}_{\cancel{B} B}^{-1}`
+ - :math:`\mathbf{Y}_B = \Kuu^{-1} \mathbf{K}_{u B}`
+
+and at prediction time, we must compute:
+
+ - :math:`\mathbf{K}_{* B}`, :math:`O(|B|)`
+ - :math:`\mathbf{K}_{* u}`, :math:`O(M)`
+ - :math:`\mathbf{Q}_{* B} = \mathbf{K}_{* u} \mathbf{Y}_B`, :math:`O(M^2)` with the existing decomposition
+ - :math:`\mathbf{V}_{* B} = \mathbf{K}_{* B} - \mathbf{Q}_{* B}`, :math:`O(|B|^2)`
+ - :math:`\mathbf{Q}_{**}^{PITC}` as with PITC
+ - :math:`\mathbf{U} = \mathbf{K}_{* u} \mathbf{W}_B \mathbf{V}_{B *}`, :math:`O(M + |B|)`?
+ - :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, :math:`O(|B|^2)`
+   

From 85abdbf7fb101efe474949f80c18a3dc4c6aa825 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 4 Apr 2024 09:55:18 +1000
Subject: [PATCH 02/13] More documentation

---
 doc/src/sparse-gp-details.rst | 48 ++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/doc/src/sparse-gp-details.rst b/doc/src/sparse-gp-details.rst
index c358ee6e..f70dafe4 100644
--- a/doc/src/sparse-gp-details.rst
+++ b/doc/src/sparse-gp-details.rst
@@ -398,22 +398,58 @@ At this point, our cross-terms are preceded by :math:`Q_{*\cancel{B}}`, which ex
 So for the variance, we must precompute:
 
  - :math:`P^TLDL^TP = \Lam`
- - :math:`\Lam^{-1} \Kfu`
+ - :math:`\mathbf{G} = \Lam^{-1} \Kfu`
  - :math:`L_{uu} L_{uu}^T = \Kuu`
- - :math:`QRP^T = B = \begin{bmatrix}\Lam^{-\frac{1}{2}}\Kfu \\ L_{uu} \end{bmatrix}` such that :math:`\mathbf{S}^{-1} = \left(B^T B\right)^{-1}`, and blocks :math:`\mathbf{S}^{-1}_{a b}` can be got by choosing the right rows / columns with which to do permutations and back-substitutions.
+ - :math:`QRP^T = B = \begin{bmatrix}\Lam^{-\frac{1}{2}}\Kfu \\ L_{uu} \end{bmatrix}` such that :math:`\mathbf{S}^{-1} = \Lam^{-1} - \mathbf{G} \left(B^T B\right)^{-1} \mathbf{G}^T`, and blocks :math:`\mathbf{S}^{-1}_{a b}` can be got by choosing the right rows / columns with which to do permutations and back-substitutions.
+
+For the mean, we compute the information vector :math:`v` as in PITC.   
 
 then for each group :math:`B`:
 
  - :math:`\mathbf{W}_B = \Kuu^{-1} \mathbf{K}_{u \cancel{B}} \mathbf{S}_{\cancel{B} B}^{-1}`
  - :math:`\mathbf{Y}_B = \Kuu^{-1} \mathbf{K}_{u B}`
+ - :math:`v_b = \Lam_{B B}^{-1} \left( y_B - \mathbf{K}_{B u} v \right)`
 
-and at prediction time, we must compute:
+Given that we have already computed :math:`\Kfu`, we can use :math:`\mathbf{K}_{u B}` and :math:`\mathbf{K}_{u \cancel{B}}` efficiently in Eigen using sparse matrices with a single entry per nonzero row or column to be used.
+   
+Then at prediction time, we must compute:
 
  - :math:`\mathbf{K}_{* B}`, :math:`O(|B|)`
  - :math:`\mathbf{K}_{* u}`, :math:`O(M)`
  - :math:`\mathbf{Q}_{* B} = \mathbf{K}_{* u} \mathbf{Y}_B`, :math:`O(M^2)` with the existing decomposition
- - :math:`\mathbf{V}_{* B} = \mathbf{K}_{* B} - \mathbf{Q}_{* B}`, :math:`O(|B|^2)`
+ - :math:`\VV_{* B} = \mathbf{K}_{* B} - \mathbf{Q}_{* B}`, :math:`O(|B|^2)`
  - :math:`\mathbf{Q}_{**}^{PITC}` as with PITC
- - :math:`\mathbf{U} = \mathbf{K}_{* u} \mathbf{W}_B \mathbf{V}_{B *}`, :math:`O(M + |B|)`?
- - :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, :math:`O(|B|^2)`
+ - :math:`\mathbf{U} = \mathbf{K}_{* u} \mathbf{W}_B \VV_{B *}`, :math:`O(M + |B|)`?
+ - :math:`\VV_{* B} \mathbf{S}^{-1}_{B B} \VV_{B *}`, :math:`O(|B|^2)`
    
+To compute :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, we form a (mostly zero) column of :math:`\mathbf{V}` for each feature, break the two terms of :math:`\mathbf{S}^{-1}` into symmetric parts, multiply by :math:`\mathbf{V}` and subtract, here in excruciating notational detail:
+
+.. math::
+           \VV_{* B} \mathbf{S}^{-1} \VV_{B *} &= \VV_{* B} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{B B} \VV_{B *} \\
+           &= \VV_{* B}  \left( \left(P_\Lam L_\Lam^{-T} D_\Lam^{-\frac{1}{2}}\right) \underbrace{\left(D_\Lam^{-\frac{1}{2}} L_\Lam^{-1} P_\Lam^T\right)}_{Z_\Lam} - \mathbf{G}^T (B^T B)^{-1} \mathbf{G} \right) \VV_{B *} \\
+           &= \VV_{* B}  \left( \mathbf{Z}_\Lam^T \mathbf{Z}_\Lam - \mathbf{G}^T (P_u R_u^{-1} R_u^{-T} P_u^T) \mathbf{G} \right) \VV_{B *} \\
+           &= \VV_{* B}  \left( \mathbf{Z}_\Lam^T \mathbf{Z}_\Lam - \mathbf{G}^T (P_u R_u^{-1}) \underbrace{(R_u^{-T} P_u^T) \mathbf{G}}_{\mathbf{Z}_u} \right) \VV_{B *} \\
+           &= \VV_{* B}  \left( \mathbf{Z}_\Lam^T \mathbf{Z}_\Lam - \mathbf{Z}_u^T \mathbf{Z}_u \right) \VV_{B *} \\
+           &= \VV_{* B} \mathbf{Z}_\Lam^T \underbrace{\mathbf{Z}_\Lam \VV_{B *}}_{\mathbf{\xi}_\Lam} - \VV_{* B} \mathbf{Z}_u^T \underbrace{\mathbf{Z}_u \VV_{B *}}_{\mathbf{\xi}_u} \\
+           &= \mathbf{\xi}_\Lam^T \mathbf{\xi}_\Lam - \mathbf{\xi}_u^T \mathbf{\xi}_u \\
+
+Note that the left-hand (subscript :math:`\Lam`) term is the decomposition of a block-diagonal matrix, so it will only contain cross-terms between features corresponding to the same local block.  This calculation can be done blockwise.  The right-hand (subscript :math:`u`) term projects the features through the local block of the training dataset and then through the inducing points, so the cross-terms are not in general sparse, and this calculation involves a lot more careful indexing.
+
+---------------------------------------
+Joint predictive distributions with PIC
+---------------------------------------
+
+This confused me mightily on several occasions, though it seems simple in retrospect, so I'm adding a note here.
+
+When you want to predict a multivariate distribution for a set of features :math:`* \in \mathbb{R}^{p}` where :math:`p > 1` and not all features are in the same local group, what happens with block indexing and cross-covariance?
+
+To compute :math:`\mathbf{U}`, on the right, the PIC covariance corrections are multiplied by :math:`\VV_{B *} \in \mathbb{R}^{N \times p}`, with only the rows in the local group nonzero.  This means you have :math:`p` columns, which are identical within groups and do not overlap nonzero rows with columns from other groups.  Computing :math:`\mathbf{W}_B = \Kuu^{-1} \mathbf{K}_{u \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B}` puzzled me because for different groups :math:`D`, :math:`E` corresponding to the features to be predicted :math:`*_{d}`, :math:`*_{e}`, the nonzero columns (rows) of :math:`\mathbf{K}_{u \cancel{D}}` (:math:`\mathbf{S}^{-1}_{\cancel{D} D}`) overlap with those of :math:`\mathbf{K}_{u \cancel{E}}` (:math:`\mathbf{S}^{-1}_{\cancel{E} E}`).  Can we actually compute this joint predictive distribution using the independent :math:`\mathbf{W}_B` blocks we precomputed, or do we have to account for this overlap somehow?
+
+I think you don't -- you just end up with :math:`p` columns, each from a different prediction feature, and each of these gets projected through the inducing points in its own :math:`\mathbf{W}_B` to all of the prediction points (rows) on the left-hand :math:`\mathbf{K}_{* u}`.  The approach is to stack up columns for :math:`*_d` and :math:`*_e`:
+
+.. math::
+   \mathbf{U} \in \mathbb{R}^{p \times p} &= \mathbf{K}_{* u} \underbrace{\begin{bmatrix} \overbrace{\mathbf{W}_D \VV_{D, *_{d}}}^{\in \mathbb{R}^{M \times |d|}} & \overbrace{\mathbf{W}_E \VV_{E, *_{e}}}^{\in \mathbb{R}^{M \times |e|}} \end{bmatrix}}_{\in \mathbb{R}^{M \times p}} \\
+
+or more generally, just produce one column for each feature with the appropriate :math:`\mathbf{W}` and :math:`\VV` terms.
+
+In the case of the symmetric component :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, the same confusion does not arise because the nonlocal data (:math:`\cancel{B}`) doesn't enter the equation; there is no overlap in nonzero rows between columns for different local blocks.
\ No newline at end of file

From 44a99fd2ba6fb8931c93d536f3f6a40269347452 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 11 Apr 2024 14:15:17 +1000
Subject: [PATCH 03/13] Doc update with fix for U / W issue

---
 doc/src/sparse-gp-details.rst | 34 +++++++---------------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/doc/src/sparse-gp-details.rst b/doc/src/sparse-gp-details.rst
index f70dafe4..288aece6 100644
--- a/doc/src/sparse-gp-details.rst
+++ b/doc/src/sparse-gp-details.rst
@@ -366,7 +366,7 @@ If we substitute :math:`\mathbf{K}_{* B} = \mathbf{Q}_{* B} + \VV_{* B}` as with
             \begin{bmatrix} \mathbf{Q}_{* \cancel{B}} & \mathbf{Q}_{* B} + \VV_{* B} \end{bmatrix}
             \underbrace{\begin{bmatrix} \mathbf{Q}_{\cancel{B} \cancel{B}} & \mathbf{Q}_{\cancel{B} B} \\ \mathbf{Q}_{B \cancel{B}} & \mathbf{Q}_{B B} \end{bmatrix}^{-1}}_{\mathbf{S}^{-1}}
             \begin{bmatrix} \mathbf{Q}_{\cancel{B} *} \\ \mathbf{Q}_{B *} + \VV_{B *}\end{bmatrix} \\
-            &= K_* - \mathbf{Q}_{**}^{PITC} - \underbrace{\mathbf{Q}_{* \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B} \VV_{B *}}_{\mathbf{U}} - \mathbf{U}^T - \mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}
+            &= K_* - \mathbf{Q}_{**}^{PITC} - \underbrace{\mathbf{Q}_{* f} \mathbf{S}^{-1}_{f B} \VV_{B *}}_{\mathbf{U}} - \mathbf{U}^T - \mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}
 
 Now we have 3 correction terms to apply to the posterior PITC covariance.  The best thing I can think of is to apply Woodbury's lemma, but in the opposite direction to usual:
 
@@ -381,19 +381,14 @@ Now we have 3 correction terms to apply to the posterior PITC covariance.  The b
 which involves decomposing the block-diagonal matrix :math:`\Lam` with blocks of size :math:`|B|` and a matrix the size :math:`M` of the inducing point set.  In practice after we precompute terms, we have a sequence of triangular factors that we can subset as needed to pick out :math:`B` and :math:`\cancel{B}`.  (Confusingly, one of these useful decompositions is the QR decomposition of the unrelated matrix :math:`B` in the PITC derivation above.)  
 
 .. math::
-            \mathbf{U} &= \mathbf{Q}_{* \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B} \VV_{B *} \\
-            &= \mathbf{Q}_{* \cancel{B}} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{\cancel{B} B} \VV_{B *}
-
-:math:`\Lam^{-1}_{\cancel{B} B}` is zero by the definition of a block-diagonal matrix.
-
-.. math::
-            \mathbf{U} &= - \mathbf{Q}_{* \cancel{B}} \left(\Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{\cancel{B} B} \VV_{B *} \\
+            \mathbf{U} &= \mathbf{Q}_{* f} \mathbf{S}^{-1}_{f B} \VV_{B *} \\
+            &= \mathbf{Q}_{* f} \left( \Lam^{-1} - \Lam^{-1} \Kfu \left( \Kuu + \Kuf \Lam^{-1} \Kfu \right)^{-1} \Kuf \Lam^{-1} \right)_{f B} \VV_{B *}
 
 This looks appealingly like we could just keep combining instances of the QR decomposition of :math:`B`, but that would leave out the :math:`\mathbf{K}_{uu}` part.
 
 The open question is how to efficiently distill this into various correction terms for each group that don't require operations that scale with :math:`\cancel{B}`, since the paper promises :math:`O((|B| + M)^2)` for predictive covariances after precomputation.  In principle, using sparse vectors / matrices for the :math:`*` target components, combined with Eigen's expression templates, should bring the complexity of some of these repeated solves for mostly empty vectors (for :math:`B`) down to :math:`O(|B|)`, and likewise for inducing points.
 
-At this point, our cross-terms are preceded by :math:`Q_{*\cancel{B}}`, which expands to :math:`K_{*u} K_{uu}^{-1} K_{u\cancel{B}}`.  So actually we should be able to precompute everything except :math:`K_{*u}`, leaving prediction-time computations to scale with :math:`M`!
+At this point, our cross-terms are preceded by :math:`Q_{* f}`, which expands to :math:`K_{*u} K_{uu}^{-1} K_{u f}`.  So actually we should be able to precompute everything except :math:`K_{*u}`, leaving prediction-time computations to scale with :math:`M`!
 
 So for the variance, we must precompute:
 
@@ -401,12 +396,12 @@ So for the variance, we must precompute:
  - :math:`\mathbf{G} = \Lam^{-1} \Kfu`
  - :math:`L_{uu} L_{uu}^T = \Kuu`
  - :math:`QRP^T = B = \begin{bmatrix}\Lam^{-\frac{1}{2}}\Kfu \\ L_{uu} \end{bmatrix}` such that :math:`\mathbf{S}^{-1} = \Lam^{-1} - \mathbf{G} \left(B^T B\right)^{-1} \mathbf{G}^T`, and blocks :math:`\mathbf{S}^{-1}_{a b}` can be got by choosing the right rows / columns with which to do permutations and back-substitutions.
+ - :math:`\mathbf{W} = \Kuu^{-1} \mathbf{K}_{u f} \mathbf{S}^{-1}`
 
 For the mean, we compute the information vector :math:`v` as in PITC.   
 
 then for each group :math:`B`:
 
- - :math:`\mathbf{W}_B = \Kuu^{-1} \mathbf{K}_{u \cancel{B}} \mathbf{S}_{\cancel{B} B}^{-1}`
  - :math:`\mathbf{Y}_B = \Kuu^{-1} \mathbf{K}_{u B}`
  - :math:`v_b = \Lam_{B B}^{-1} \left( y_B - \mathbf{K}_{B u} v \right)`
 
@@ -435,21 +430,6 @@ To compute :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, we f
 
 Note that the left-hand (subscript :math:`\Lam`) term is the decomposition of a block-diagonal matrix, so it will only contain cross-terms between features corresponding to the same local block.  This calculation can be done blockwise.  The right-hand (subscript :math:`u`) term projects the features through the local block of the training dataset and then through the inducing points, so the cross-terms are not in general sparse, and this calculation involves a lot more careful indexing.
 
----------------------------------------
-Joint predictive distributions with PIC
----------------------------------------
-
-This confused me mightily on several occasions, though it seems simple in retrospect, so I'm adding a note here.
-
-When you want to predict a multivariate distribution for a set of features :math:`* \in \mathbb{R}^{p}` where :math:`p > 1` and not all features are in the same local group, what happens with block indexing and cross-covariance?
-
-To compute :math:`\mathbf{U}`, on the right, the PIC covariance corrections are multiplied by :math:`\VV_{B *} \in \mathbb{R}^{N \times p}`, with only the rows in the local group nonzero.  This means you have :math:`p` columns, which are identical within groups and do not overlap nonzero rows with columns from other groups.  Computing :math:`\mathbf{W}_B = \Kuu^{-1} \mathbf{K}_{u \cancel{B}} \mathbf{S}^{-1}_{\cancel{B} B}` puzzled me because for different groups :math:`D`, :math:`E` corresponding to the features to be predicted :math:`*_{d}`, :math:`*_{e}`, the nonzero columns (rows) of :math:`\mathbf{K}_{u \cancel{D}}` (:math:`\mathbf{S}^{-1}_{\cancel{D} D}`) overlap with those of :math:`\mathbf{K}_{u \cancel{E}}` (:math:`\mathbf{S}^{-1}_{\cancel{E} E}`).  Can we actually compute this joint predictive distribution using the independent :math:`\mathbf{W}_B` blocks we precomputed, or do we have to account for this overlap somehow?
-
-I think you don't -- you just end up with :math:`p` columns, each from a different prediction feature, and each of these gets projected through the inducing points in its own :math:`\mathbf{W}_B` to all of the prediction points (rows) on the left-hand :math:`\mathbf{K}_{* u}`.  The approach is to stack up columns for :math:`*_d` and :math:`*_e`:
-
-.. math::
-   \mathbf{U} \in \mathbb{R}^{p \times p} &= \mathbf{K}_{* u} \underbrace{\begin{bmatrix} \overbrace{\mathbf{W}_D \VV_{D, *_{d}}}^{\in \mathbb{R}^{M \times |d|}} & \overbrace{\mathbf{W}_E \VV_{E, *_{e}}}^{\in \mathbb{R}^{M \times |e|}} \end{bmatrix}}_{\in \mathbb{R}^{M \times p}} \\
-
-or more generally, just produce one column for each feature with the appropriate :math:`\mathbf{W}` and :math:`\VV` terms.
+The same breakdown of :math:`\mathbf{S}^{-1}` can be used to compute :math:`\mathbf{W}` during the fit step.
 
-In the case of the symmetric component :math:`\mathbf{V}_{* B} \mathbf{S}^{-1}_{B B} \mathbf{V}_{B *}`, the same confusion does not arise because the nonlocal data (:math:`\cancel{B}`) doesn't enter the equation; there is no overlap in nonzero rows between columns for different local blocks.
\ No newline at end of file
+The notation :math:`\mathbf{W}_B` indicates that the relevant columns of :math:`\mathbf{W}` are used on the right-hand side.  This is mathematically equivalent to making :math:`\VV_{B *}` have dimension :math:`N \times p` and be zero outside block :math:`B`.  Computationally the right factor must be a sparse object to preserve the desired asymptotics.
\ No newline at end of file

From 2e161ec4bcae5eeb2259869add63f10ff718d669 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 11 Apr 2024 14:16:15 +1000
Subject: [PATCH 04/13] WIP: PIC

---
 BUILD.bazel                                   |    3 +
 include/albatross/SparseGP                    |    3 +
 include/albatross/src/models/pic_gp.hpp       | 1370 +++++++++++++++++
 .../albatross/src/models/sparse_common.hpp    |   92 ++
 include/albatross/src/models/sparse_gp.hpp    |   71 -
 tests/test_pic_gp.cc                          |  650 ++++++++
 6 files changed, 2118 insertions(+), 71 deletions(-)
 create mode 100644 include/albatross/src/models/pic_gp.hpp
 create mode 100644 include/albatross/src/models/sparse_common.hpp
 create mode 100644 tests/test_pic_gp.cc

diff --git a/BUILD.bazel b/BUILD.bazel
index c607ec11..74e2d111 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -127,8 +127,10 @@ cc_library(
         "include/albatross/src/models/gp.hpp",
         "include/albatross/src/models/least_squares.hpp",
         "include/albatross/src/models/null_model.hpp",
+        "include/albatross/src/models/pic_gp.hpp",
         "include/albatross/src/models/ransac.hpp",
         "include/albatross/src/models/ransac_gp.hpp",
+        "include/albatross/src/models/sparse_common.hpp",
         "include/albatross/src/models/sparse_gp.hpp",
         "include/albatross/src/samplers/callbacks.hpp",
         "include/albatross/src/samplers/ensemble.hpp",
@@ -227,6 +229,7 @@ swift_cc_test(
         "tests/test_models.cc",
         "tests/test_models.h",
         "tests/test_parameter_handling_mixin.cc",
+        "tests/test_pic_gp.cc",
         "tests/test_prediction.cc",
         "tests/test_radial.cc",
         "tests/test_random_utils.cc",
diff --git a/include/albatross/SparseGP b/include/albatross/SparseGP
index 2a26aab2..0f14113e 100644
--- a/include/albatross/SparseGP
+++ b/include/albatross/SparseGP
@@ -18,11 +18,14 @@
 #include <Eigen/SPQRSupport>
 
 #include "GP"
+// #include "Common"
 #include "linalg/Utils"
 
 #include <albatross/src/eigen/serializable_spqr.hpp>
 #include <albatross/src/linalg/block_utils.hpp>
 #include <albatross/src/utils/eigen_utils.hpp>
+#include <albatross/src/models/sparse_common.hpp>
 #include <albatross/src/models/sparse_gp.hpp>
+#include <albatross/src/models/pic_gp.hpp>
 
 #endif
diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
new file mode 100644
index 00000000..62078b97
--- /dev/null
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -0,0 +1,1370 @@
+/*
+ * Copyright (C) 2024 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef INCLUDE_ALBATROSS_MODELS_PIC_GP_H_
+#define INCLUDE_ALBATROSS_MODELS_PIC_GP_H_
+
+namespace albatross {
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy, typename QRImplementation>
+class PICGaussianProcessRegression;
+
+template <typename GrouperFunction, typename InducingFeatureType,
+          typename FeatureType>
+struct PICGPFit {};
+
+template <typename GrouperFunction, typename FeatureType>
+using PICGroupIndexType =
+    typename std::result_of<GrouperFunction(const FeatureType &)>::type;
+
+template <typename FeatureType> struct PICGroup {
+  // Row indices are into the original set of features into the data
+  // used to form S.
+  Eigen::Index initial_row;
+  Eigen::Index block_size;
+  Eigen::Index block_index;
+  albatross::RegressionDataset<FeatureType> dataset;
+};
+
+template <typename GrouperFunction, typename FeatureType>
+using PICGroupMap = std::map<PICGroupIndexType<GrouperFunction, FeatureType>,
+                             PICGroup<FeatureType>>;
+
+template <typename GrouperFunction, typename InducingFeatureType,
+          typename FeatureType>
+struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
+  using PermutationIndices = Eigen::Matrix<Eigen::Index, Eigen::Dynamic, 1>;
+  using GroupIndexType = PICGroupIndexType<GrouperFunction, FeatureType>;
+  using GroupMap = PICGroupMap<GrouperFunction, FeatureType>;
+
+  std::vector<FeatureType> train_features;
+  std::vector<InducingFeatureType> inducing_features;
+  Eigen::SerializableLDLT train_covariance;
+  Eigen::MatrixXd sigma_R;
+  Eigen::PermutationMatrixX P;
+  std::vector<Eigen::VectorXd> mean_w;
+  Eigen::MatrixXd W;
+  // std::vector<Eigen::MatrixXd> covariance_W;
+  std::vector<Eigen::MatrixXd> covariance_Y;
+  Eigen::MatrixXd Z;
+  BlockDiagonalLDLT A_ldlt;
+  GroupMap measurement_groups;
+  Eigen::VectorXd information;
+  Eigen::Index numerical_rank;
+
+  // debug stuff
+  std::vector<Eigen::MatrixXd> covariance_Ynot;
+  Eigen::SerializableLDLT K_PITC_ldlt;
+  std::vector<Eigen::SparseMatrix<double>> cols_Bs;
+  std::vector<Eigen::SparseMatrix<double>> cols_Cs;
+
+  Fit(){};
+
+  Fit(const std::vector<FeatureType> &features_,
+      const std::vector<InducingFeatureType> &inducing_features_,
+      const Eigen::SerializableLDLT &train_covariance_,
+      const Eigen::MatrixXd &sigma_R_, const Eigen::PermutationMatrixX &P_,
+      const std::vector<Eigen::VectorXd> &mean_w_, const Eigen::MatrixXd &W_,
+      const std::vector<Eigen::MatrixXd> &covariance_Y_,
+      const Eigen::MatrixXd &Z_, const BlockDiagonalLDLT &A_ldlt_,
+      const GroupMap &measurement_groups_, const Eigen::VectorXd &information_,
+      Eigen::Index numerical_rank_,
+      const std::vector<Eigen::MatrixXd> &covariance_Ynot_,
+      const Eigen::SerializableLDLT &K_PITC_ldlt_,
+      const std::vector<Eigen::SparseMatrix<double>> cols_Bs_,
+      const std::vector<Eigen::SparseMatrix<double>> cols_Cs_)
+      : train_features(features_), inducing_features(inducing_features_),
+        train_covariance(train_covariance_), sigma_R(sigma_R_), P(P_),
+        mean_w(mean_w_), W(W_), covariance_Y(covariance_Y_), Z(Z_),
+        A_ldlt(A_ldlt_), measurement_groups(measurement_groups_),
+        information(information_), numerical_rank(numerical_rank_),
+        // debug stuff
+        covariance_Ynot(covariance_Ynot_), K_PITC_ldlt(K_PITC_ldlt_),
+        cols_Bs(cols_Bs_), cols_Cs(cols_Cs_) {}
+
+  void shift_mean(const Eigen::VectorXd &mean_shift) {
+    ALBATROSS_ASSERT(mean_shift.size() == information.size());
+    information += train_covariance.solve(mean_shift);
+  }
+
+  bool operator==(
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>>
+          &other) const {
+    return (train_features == other.train_features &&
+            inducing_features == other.inducing_features &&
+            train_covariance == other.train_covariance &&
+            sigma_R == other.sigma_R && P.indices() == other.P.indices() &&
+            mean_w == other.mean_w && W == W &&
+            covariance_Y == other.covariance_Y && Z == other.Z &&
+            A_ldlt == other.A_ldlt &&
+            measurement_groups == other.measurement_groups &&
+            information == other.information &&
+            numerical_rank == other.numerical_rank &&
+            // Debug stuff
+            covariance_Ynot == other.covariance_Ynot &&
+            K_PITC_ldlt == other.K_PITC_ldlt && cols_Bs == other.cols_Bs &&
+            cols_Cs == other.cols_Cs);
+  }
+};
+
+/*
+ *  This class implements an approximation technique for Gaussian processes
+ * which relies on an assumption that all observations are independent (or
+ * groups of observations are independent) conditional on a set of inducing
+ * points.  The method is based off:
+ *
+ *     [1] Sparse Gaussian Processes using Pseudo-inputs
+ *     Edward Snelson, Zoubin Ghahramani
+ *     http://www.gatsby.ucl.ac.uk/~snelson/SPGP_up.pdf
+ *
+ *  Though the code uses notation closer to that used in this (excellent)
+ * overview of these methods:
+ *
+ *     [2] A Unifying View of Sparse Approximate Gaussian Process Regression
+ *     Joaquin Quinonero-Candela, Carl Edward Rasmussen
+ *     http://www.jmlr.org/papers/volume6/quinonero-candela05a/quinonero-candela05a.pdf
+ *
+ *  Very broadly speaking this method starts with a prior over the observations,
+ *
+ *     [f] ~ N(0, K_ff)
+ *
+ *  where K_ff(i, j) = covariance_function(features[i], features[j]) and f
+ * represents the function value.
+ *
+ *  It then uses a set of inducing points, u, and makes some assumptions about
+ * the conditional distribution:
+ *
+ *     [f|u] ~ N(K_fu K_uu^-1 u, K_ff - Q_ff)
+ *
+ *  Where Q_ff = K_fu K_uu^-1 K_uf represents the variance in f that is
+ * explained by u.
+ *
+ *  For FITC (Fully Independent Training Contitional) the assumption is that
+ * K_ff - Qff is diagonal, for PITC (Partially Independent Training Conditional)
+ * that it is block diagonal.  These assumptions lead to an efficient way of
+ * inferring the posterior distribution for some new location f*,
+ *
+ *     [f*|f=y] ~ N(K_*u S K_uf A^-1 y, K_** - Q_** + K_*u S K_u*)
+ *
+ *  Where S = (K_uu + K_uf A^-1 K_fu)^-1 and A = diag(K_ff - Q_ff) and "diag"
+ * may mean diagonal or block diagonal.  Regardless we end up with O(m^2n)
+ * complexity instead of O(n^3) of direct Gaussian processes.  (Note that in [2]
+ * S is called sigma and A is lambda.)
+ *
+ *  Of course, the implementation details end up somewhat more complex in order
+ * to improve numerical stability.  Here we use an approach based off the QR
+ * decomposition which is described in
+ *
+ *     Stable and Efficient Gaussian Process Calculations
+ *     http://www.jmlr.org/papers/volume10/foster09a/foster09a.pdf
+ *
+ * A more detailed (but more likely to be out of date) description of
+ * the details can be found on the albatross documentation.  A short
+ * description follows.  It starts by setting up the Sparse Gaussian process
+ * covariances
+ *
+ *   [f|u] ~ N(K_fu K_uu^-1 u, K_ff - Q_ff)
+ *
+ * We then set,
+ *
+ *   A = K_ff - Q_ff
+ *     = K_ff - K_fu K_uu^-1 K_uf
+ *
+ * which can be thought of as the covariance in the training data which
+ * is not be explained by the inducing points.  The fundamental
+ * assumption in these sparse Gaussian processes is that A is sparse, in
+ * this case block diagonal.
+ *
+ * We then build a matrix B and use its QR decomposition (with pivoting P)
+ *
+ *   B = |A^-1/2 K_fu| = |Q_1| R P^T
+ *       |K_uu^{T/2} |   |Q_2|
+ *
+ * After which we can get the information vector (see _fit_impl)
+ *
+ *   v = (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1 y
+ *     = (B^T B) B^T A^-1/2 y
+ *     = P R^-1 Q_1^T A^-1/2 y
+ *
+ * and can make predictions for new locations (see _predict_impl),
+ *
+ *   [f*|f=y] ~ N(K_*u S K_uf A^-1 y, K_** - Q_** + K_*u S K_u*)
+ *            ~ N(m, C)
+ *
+ *  where we have
+ *
+ *    m = K_*u S K_uf A^-1 y
+ *      = K_*u v
+ *
+ *  and
+ *
+ *    C = K_** - Q_** + K_*u S K_u*
+ *
+ *  using
+ *
+ *    Q_** = K_*u K_uu^-1 K_u*
+ *         = (K_uu^{-1/2}  K_u*)^T (K_uu^{-1/2}  K_u*)
+ *         = Q_sqrt^T Q_sqrt
+ *  and
+ *
+ *    K_*u S K_u* = K_*u (K_uu + K_uf A^-1 K_fu)^-1   K_u*
+ *                = K_*u (B^T B)^-1 K_u*
+ *                = K_*u (P R R^T P^T)^-1 K_u*
+ *                = (P R^-T K_u*)^T (P R^-T K_u*)
+ *                = S_sqrt^T S_sqrt
+ */
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy,
+          typename QRImplementation = DenseQRImplementation>
+class PICGaussianProcessRegression
+    : public GaussianProcessBase<CovFunc, MeanFunc,
+                                 PICGaussianProcessRegression<
+                                     CovFunc, MeanFunc, GrouperFunction,
+                                     InducingPointStrategy, QRImplementation>> {
+  InducingPointStrategy inducing_point_strategy_;
+
+public:
+  using Base = GaussianProcessBase<
+      CovFunc, MeanFunc,
+      PICGaussianProcessRegression<CovFunc, MeanFunc, GrouperFunction,
+                                   InducingPointStrategy, QRImplementation>>;
+
+  template <typename FeatureType>
+  using GroupIndexType = PICGroupIndexType<GrouperFunction, FeatureType>;
+
+  template <typename FeatureType>
+  using GroupMap = PICGroupMap<GrouperFunction, FeatureType>;
+
+  PICGaussianProcessRegression() : Base() { initialize_params(); };
+
+  PICGaussianProcessRegression(const CovFunc &covariance_function,
+                               const MeanFunc &mean_function)
+      : Base(covariance_function, mean_function) {
+    initialize_params();
+  };
+  PICGaussianProcessRegression(CovFunc &&covariance_function,
+                               MeanFunc &&mean_function)
+      : Base(std::move(covariance_function), std::move(mean_function)) {
+    initialize_params();
+  };
+
+  PICGaussianProcessRegression(
+      const CovFunc &covariance_function, const MeanFunc &mean_function,
+      const GrouperFunction &independent_group_function,
+      const InducingPointStrategy &inducing_point_strategy,
+      const std::string &model_name)
+      : Base(covariance_function, mean_function, model_name),
+        inducing_point_strategy_(inducing_point_strategy),
+        independent_group_function_(independent_group_function) {
+    initialize_params();
+  };
+  PICGaussianProcessRegression(CovFunc &&covariance_function,
+                               MeanFunc &&mean_function,
+                               GrouperFunction &&independent_group_function,
+                               InducingPointStrategy &&inducing_point_strategy,
+                               const std::string &model_name)
+      : Base(std::move(covariance_function), std::move(mean_function),
+             model_name),
+        inducing_point_strategy_(std::move(inducing_point_strategy)),
+        independent_group_function_(std::move(independent_group_function)) {
+    initialize_params();
+  };
+
+  void initialize_params() {
+    measurement_nugget_ = {
+        details::DEFAULT_NUGGET,
+        LogScaleUniformPrior(PARAMETER_EPSILON, PARAMETER_MAX)};
+    inducing_nugget_ = {details::DEFAULT_NUGGET,
+                        LogScaleUniformPrior(PARAMETER_EPSILON, PARAMETER_MAX)};
+  }
+
+  ParameterStore get_params() const override {
+    auto params = map_join(this->mean_function_.get_params(),
+                           this->covariance_function_.get_params());
+    params[details::measurement_nugget_name()] = measurement_nugget_;
+    params[details::inducing_nugget_name()] = inducing_nugget_;
+    return params;
+  }
+
+  void set_param(const std::string &name, const Parameter &param) override {
+    if (name == details::measurement_nugget_name()) {
+      measurement_nugget_ = param;
+      return;
+    } else if (name == details::inducing_nugget_name()) {
+      inducing_nugget_ = param;
+      return;
+    }
+    const bool success = set_param_if_exists_in_any(
+        name, param, &this->covariance_function_, &this->mean_function_);
+    ALBATROSS_ASSERT(success);
+  }
+
+  template <typename FeatureType, typename InducingPointFeatureType>
+  auto
+  _update_impl(const Fit<PICGPFit<GrouperFunction, InducingPointFeatureType,
+                                  FeatureType>> &old_fit,
+               const std::vector<FeatureType> &features,
+               const MarginalDistribution &targets) const {
+
+    BlockDiagonalLDLT A_ldlt;
+    Eigen::SerializableLDLT K_uu_ldlt;
+    Eigen::MatrixXd K_fu;
+    Eigen::VectorXd y;
+    Eigen::SerializableLDLT K_PITC_ldlt;
+    compute_internal_components(old_fit.train_features, features, targets,
+                                &A_ldlt, &K_uu_ldlt, &K_fu, &y, &K_PITC_ldlt);
+
+    const Eigen::Index n_old = old_fit.sigma_R.rows();
+    const Eigen::Index n_new = A_ldlt.rows();
+    const Eigen::Index k = old_fit.sigma_R.cols();
+    Eigen::MatrixXd B = Eigen::MatrixXd::Zero(n_old + n_new, k);
+
+    ALBATROSS_ASSERT(n_old == k);
+
+    // Form:
+    //   B = |R_old P_old^T| = |Q_1| R P^T
+    //       |A^{-1/2} K_fu|   |Q_2|
+    for (Eigen::Index i = 0; i < old_fit.permutation_indices.size(); ++i) {
+      const Eigen::Index &pi = old_fit.permutation_indices.coeff(i);
+      B.col(pi).topRows(i + 1) = old_fit.sigma_R.col(i).topRows(i + 1);
+    }
+    B.bottomRows(n_new) = A_ldlt.sqrt_solve(K_fu);
+    const auto B_qr = QRImplementation::compute(B, Base::threads_.get());
+
+    // Form:
+    //   y_aug = |R_old P_old^T v_old|
+    //           |A^{-1/2} y         |
+    ALBATROSS_ASSERT(old_fit.information.size() == n_old);
+    Eigen::VectorXd y_augmented(n_old + n_new);
+    for (Eigen::Index i = 0; i < old_fit.permutation_indices.size(); ++i) {
+      y_augmented[i] =
+          old_fit.information[old_fit.permutation_indices.coeff(i)];
+    }
+    y_augmented.topRows(n_old) =
+        old_fit.sigma_R.template triangularView<Eigen::Upper>() *
+        y_augmented.topRows(n_old);
+
+    y_augmented.bottomRows(n_new) = A_ldlt.sqrt_solve(y, Base::threads_.get());
+    const Eigen::VectorXd v = B_qr->solve(y_augmented);
+
+    Eigen::MatrixXd R = get_R(*B_qr);
+    if (B_qr->rank() < B_qr->cols()) {
+      // Inflate the diagonal of R in an attempt to avoid singularity
+      R.diagonal() +=
+          Eigen::VectorXd::Constant(B_qr->cols(), details::cSparseRNugget);
+    }
+    using FitType =
+        Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
+    return FitType(old_fit.train_features, old_fit.train_covariance, R,
+                   get_P(*B_qr), v, B_qr->rank());
+  }
+
+  // Here we create the QR decomposition of:
+  //
+  //   B = |A^-1/2 K_fu| = |Q_1| R P^T
+  //       |K_uu^{T/2} |   |Q_2|
+  //
+  // which corresponds to the inverse square root of Sigma
+  //
+  //   Sigma = (B^T B)^-1
+  std::unique_ptr<typename QRImplementation::QRType>
+  compute_sigma_qr(const Eigen::SerializableLDLT &K_uu_ldlt,
+                   const BlockDiagonalLDLT &A_ldlt,
+                   const Eigen::MatrixXd &K_fu) const {
+    Eigen::MatrixXd B(A_ldlt.rows() + K_uu_ldlt.rows(), K_uu_ldlt.rows());
+    B.topRows(A_ldlt.rows()) = A_ldlt.sqrt_solve(K_fu);
+    B.bottomRows(K_uu_ldlt.rows()) = K_uu_ldlt.sqrt_transpose();
+    return QRImplementation::compute(B, Base::threads_.get());
+  };
+
+  template <
+      typename FeatureType,
+      std::enable_if_t<
+          has_call_operator<CovFunc, FeatureType, FeatureType>::value, int> = 0>
+  auto _fit_impl(const std::vector<FeatureType> &features,
+                 const MarginalDistribution &targets) const {
+    // Determine the set of inducing points, u.
+    const auto u =
+        inducing_point_strategy_(this->covariance_function_, features);
+    ALBATROSS_ASSERT(u.size() > 0 && "Empty inducing points!");
+    // std::cerr << "features: " << features.size() << std::endl;
+    // std::cerr << "u: " << u.size() << std::endl;
+
+    BlockDiagonalLDLT A_ldlt;
+    Eigen::SerializableLDLT K_uu_ldlt;
+    Eigen::MatrixXd K_fu;
+    Eigen::VectorXd y;
+    GroupMap<FeatureType> measurement_groups;
+    Eigen::SerializableLDLT K_PITC_ldlt;
+    compute_internal_components(u, features, targets, &A_ldlt, &K_uu_ldlt,
+                                &K_fu, &y, &measurement_groups, &K_PITC_ldlt);
+    auto B_qr = compute_sigma_qr(K_uu_ldlt, A_ldlt, K_fu);
+
+    // To make a prediction, we will need to compute cross-terms with
+    // sub-blocks of S^-1, where
+    //
+    // S^-1 = A^-1 - A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1
+    //               \____________________________________________/
+    //                                    Z^T Z
+    //
+    // The inverse part of the right-hand term of this is B_qr above,
+    // and we already have A_ldlt, so we can form Z.
+    //
+    // Now to compute sub-blocks of S^-1, we just index into Z^T Z
+    // appropriately, and if the term is diagonal, also solve using
+    // the blocks of A^-1.
+    const Eigen::MatrixXd Z = sqrt_solve(*B_qr, A_ldlt.solve(K_fu).transpose());
+        // get_R(*B_qr).template triangularView<Eigen::Upper>().transpose().solve(
+        //     get_P(*B_qr).transpose() * A_ldlt.solve(K_fu).transpose());
+    // std::cout << "Z (" << Z.rows() << "x" << Z.cols() << "):\n"
+    //           << Z << std::endl;
+
+    Eigen::VectorXd y_augmented = Eigen::VectorXd::Zero(B_qr->rows());
+    y_augmented.topRows(y.size()) = A_ldlt.sqrt_solve(y, Base::threads_.get());
+    const Eigen::VectorXd v = B_qr->solve(y_augmented);
+
+    using InducingPointFeatureType = typename std::decay<decltype(u[0])>::type;
+
+    const Eigen::MatrixXd W =
+      K_uu_ldlt.solve((A_ldlt.solve(K_fu) - Z.transpose() * Z * K_fu).transpose());
+    // std::cout << "W (" << W.rows() << "x" << W.cols() << "):\n"
+    //           << W.format(Eigen::FullPrecision) << std::endl;
+
+    const Eigen::MatrixXd W2 =
+        K_uu_ldlt.solve(K_PITC_ldlt.solve(K_fu).transpose());
+    // std::cout << "W2 (" << W2.rows() << "x" << W2.cols() << "):\n"
+    //           << W2.format(Eigen::FullPrecision) << std::endl;
+
+    // TODO(@peddie): a lot of this can be batched
+    std::vector<Eigen::MatrixXd> covariance_Ynot(measurement_groups.size());
+    std::vector<Eigen::MatrixXd> covariance_Y(measurement_groups.size());
+    std::vector<Eigen::VectorXd> mean_w(measurement_groups.size());
+    std::vector<Eigen::SparseMatrix<double>> cols_Bs(measurement_groups.size());
+    std::vector<Eigen::SparseMatrix<double>> cols_Cs(measurement_groups.size());
+    const auto block_start_indices = A_ldlt.block_to_row_map();
+
+    const auto precompute_block =
+        [this, &A_ldlt, &features, &y, &K_fu, &v, &Z, &K_uu_ldlt, &mean_w,
+         &covariance_Y, &covariance_Ynot, &cols_Bs,
+         &cols_Cs](std::size_t block_number, Eigen::Index start_row) -> void {
+      // const std::size_t block_number = block_start.first;
+      // const Eigen::Index start_row = block_start.second;
+      const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
+      // K_fu is already computed, so we can form K_uB and K_uA by
+      // appropriate use of sparse indexing matrices and avoid an O(N
+      // M) operation.
+      //
+      // This nonsense would be a lot more straightforward with Eigen
+      // 3.4's slicing and indexing API.
+      Eigen::SparseMatrix<double> cols_B(features.size(), block_size);
+      cols_B.reserve(block_size);
+      Eigen::SparseMatrix<double> cols_C(features.size(),
+                                         features.size() - block_size);
+      cols_C.reserve(features.size() - block_size);
+
+      for (Eigen::Index i = 0; i < block_size; ++i) {
+        cols_B.insert(start_row + i, i) = 1.;
+      }
+      cols_B.makeCompressed();
+
+      // std::cout << "block " << block_number << " -- start_row: " << start_row
+      //           << "; block_size: " << block_size << std::endl;
+      // std::cout << "cols_B (" << cols_B.rows() << "x" << cols_B.cols() << "):\n"
+      //           << Eigen::MatrixXd(cols_B) << std::endl;
+
+      for (Eigen::Index i = 0, j = 0; i < features.size();) {
+        if (i == start_row) {
+          i += block_size;
+          continue;
+        }
+        cols_C.insert(i, j) = 1.;
+        i++;
+        j++;
+      }
+      cols_C.makeCompressed();
+
+      Eigen::Index col = 0;
+      for (Eigen::Index k = 0; k < cols_C.outerSize(); ++k) {
+        for (decltype(cols_C)::InnerIterator it(cols_C, k); it; ++it) {
+          assert(it.col() == col++);
+        }
+      }
+
+      cols_Bs[block_number] = cols_B;
+      cols_Cs[block_number] = cols_C;
+
+      // std::cout << "cols_C (" << cols_C.rows() << "x" << cols_C.cols() << "):\n"
+      //           << Eigen::MatrixXd(cols_C) << std::endl;
+      // v_b \in R^b = A_BB^-1 (y_b - K_Bu v)
+      Eigen::MatrixXd ydiff_b = cols_B.transpose() * (y - K_fu * v);
+      // std::cerr << "ydiff_b: " << ydiff_b.rows() << " x " << ydiff_b.cols()
+      //           << std::endl;
+      // std::cerr << "cols_B: " << cols_B.rows() << " x " << cols_B.cols()
+      //           << std::endl;
+      // std::cerr << "A_ldlt: " << A_ldlt.rows() << " x " << A_ldlt.cols()
+      //           << " in " << A_ldlt.blocks.size() << " blocks: { ";
+      // for (const auto &block : A_ldlt.blocks) {
+      //   std::cerr << block.rows() << ", ";
+      // }
+      // std::cerr << " }" << std::endl;
+      const Eigen::MatrixXd mean_w_full =
+          A_ldlt.blocks[block_number].solve(ydiff_b);
+
+      if (A_ldlt.blocks.size() > 1) {
+        // std::cout << "K_fu (" << K_fu.rows() << "x" << K_fu.cols() <<
+        // "):\n"
+        //           << K_fu << std::endl;
+        const Eigen::MatrixXd KufC = K_fu.transpose() * cols_C;
+        // std::cout << "KufC (" << KufC.rows() << "x" << KufC.cols() << "):\n"
+        //           << KufC << std::endl;
+        // const Eigen::MatrixXd ZC = Z * cols_C;
+        // // std::cout << "ZC.transpose() (" << ZC.transpose().rows() << "x"
+        // //           << ZC.transpose().cols() << "):\n"
+        // //           << ZC.transpose() << std::endl;
+
+        // const Eigen::MatrixXd KuZT = KufC * ZC.transpose();
+        // const Eigen::MatrixXd KuZTZ = KuZT * Z;
+        // const Eigen::MatrixXd KuZTZB = -KuZTZ * cols_B;
+        // covariance_W[block_number] = K_uu_ldlt.solve(KuZTZB);
+        // std::cout << "covariance_W[" << block_number << "]:\n"
+        //           << covariance_W[block_number].format(Eigen::FullPrecision)
+        //           << std::endl;
+        covariance_Ynot[block_number] = K_uu_ldlt.solve(KufC);
+        // covariance_W.emplace_back(K_uu_ldlt.solve(
+        //     (-(K_fu.transpose() * cols_C) * Z.transpose() * Z) *
+        //     cols_B));
+      }
+
+      // std::cout << "mean_w_full (" << mean_w_full.rows() << "x"
+      //           << mean_w_full.cols() << "):\n"
+      //           << mean_w_full << std::endl;
+      // const Eigen::MatrixXd mean_w_block = cols_B.transpose() *
+      // mean_w_full; std::cout << "mean_w_block (" << mean_w_block.rows()
+      // << "x"
+      //           << mean_w_block.cols() << "):\n"
+      //           << mean_w_block << std::endl;
+      // mean_w.emplace_back(cols_B.transpose() *
+      //                     A_ldlt.blocks[block_number].solve(ydiff_b));
+      mean_w[block_number] = mean_w_full;
+      // Y \in R^(u x b) = K_uu^-1 K_uB
+      covariance_Y[block_number] = K_uu_ldlt.solve(K_fu.transpose() * cols_B);
+      // std::cout << "covariance_Y[" << block_number << "]:\n"
+      //           << covariance_Y[block_number].format(Eigen::FullPrecision)
+      //           << std::endl;
+      // W \in R^(u x b) = K_uu^-1 K_uC S_CB^-1
+      //                 = K_uu^-1 K_uC (A^-1 - Z^T Z)
+      //                 [A^-1 is block diagonal; C and B are disjoint]
+      //                 = K_uu^-1 K_uC (- Z^T Z)
+    };
+
+    // for (const auto &block_start : block_start_indices) {
+    //   const std::size_t block_number = block_start.first;
+    //   const Eigen::Index start_row = block_start.second;
+    //   const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
+    //   // K_fu is already computed, so we can form K_uB and K_uA by
+    //   // appropriate use of sparse indexing matrices and avoid an O(N
+    //   // M) operation.
+    //   //
+    //   // This nonsense would be a lot more straightforward with Eigen
+    //   // 3.4's slicing and indexing API.
+    //   Eigen::SparseMatrix<double> cols_B(features.size(), block_size);
+    //   cols_B.reserve(block_size);
+    //   Eigen::SparseMatrix<double> cols_C(features.size(),
+    //                                      features.size() - block_size);
+    //   cols_C.reserve(features.size() - block_size);
+
+    //   for (Eigen::Index i = 0; i < block_size; ++i) {
+    //     cols_B.insert(start_row + i, i) = 1.;
+    //   }
+    //   cols_B.makeCompressed();
+
+    //   // std::cout << "cols_B (" << cols_B.rows() << "x" << cols_B.cols() <<
+    //   "):\n"
+    //   //           << Eigen::MatrixXd(cols_B) << std::endl;
+
+    //   // std::cout << "start_row: " << start_row << "; block_size: " <<
+    //   block_size
+    //   //           << std::endl;
+    //   for (Eigen::Index i = 0, j = 0; i < features.size();) {
+    //     if (i == start_row) {
+    //       i += block_size;
+    //       continue;
+    //     }
+    //     cols_C.insert(i, j) = 1.;
+    //     i++;
+    //     j++;
+    //   }
+    //   cols_C.makeCompressed();
+
+    //   Eigen::Index col = 0;
+    //   for (Eigen::Index k = 0; k < cols_C.outerSize(); ++k) {
+    //     for (decltype(cols_C)::InnerIterator it(cols_C, k); it; ++it) {
+    //       assert(it.col() == col++);
+    //     }
+    //   }
+    //   // std::cout << "cols_C (" << cols_C.rows() << "x" << cols_C.cols() <<
+    //   "):\n"
+    //   //           << Eigen::MatrixXd(cols_C) << std::endl;
+    //   // v_b \in R^b = A_BB^-1 (y_b - K_Bu v)
+    //   Eigen::MatrixXd ydiff_b = cols_B.transpose() * (y - K_fu * v);
+    //   // std::cerr << "ydiff_b: " << ydiff_b.rows() << " x " <<
+    //   ydiff_b.cols()
+    //   //           << std::endl;
+    //   // std::cerr << "cols_B: " << cols_B.rows() << " x " << cols_B.cols()
+    //   //           << std::endl;
+    //   // std::cerr << "A_ldlt: " << A_ldlt.rows() << " x " << A_ldlt.cols()
+    //   //           << " in " << A_ldlt.blocks.size() << " blocks: { ";
+    //   // for (const auto &block : A_ldlt.blocks) {
+    //   //   std::cerr << block.rows() << ", ";
+    //   // }
+    //   // std::cerr << " }" << std::endl;
+    //   const Eigen::MatrixXd mean_w_full =
+    //       A_ldlt.blocks[block_number].solve(ydiff_b);
+    //   // std::cout << "mean_w_full (" << mean_w_full.rows() << "x"
+    //   //           << mean_w_full.cols() << "):\n"
+    //   //           << mean_w_full << std::endl;
+    //   // const Eigen::MatrixXd mean_w_block = cols_B.transpose() *
+    //   mean_w_full;
+    //   // std::cout << "mean_w_block (" << mean_w_block.rows() << "x"
+    //   //           << mean_w_block.cols() << "):\n"
+    //   //           << mean_w_block << std::endl;
+    //   // mean_w.emplace_back(cols_B.transpose() *
+    //   //                     A_ldlt.blocks[block_number].solve(ydiff_b));
+    //   mean_w.push_back(mean_w_full);
+    //   // Y \in R^(u x b) = K_uu^-1 K_uB
+    //   covariance_Y.emplace_back(K_uu_ldlt.solve(K_fu.transpose() * cols_B));
+    //   // W \in R^(u x b) = K_uu^-1 K_uC S_CB^-1
+    //   //                 = K_uu^-1 K_uC (A^-1 - Z^T Z)
+    //   //                 [A^-1 is block diagonal; C and B are disjoint]
+    //   //                 = K_uu^-1 K_uC (- Z^T Z)
+
+    //   if (A_ldlt.blocks.size() > 1) {
+    //     // std::cout << "K_fu (" << K_fu.rows() << "x" << K_fu.cols() <<
+    //     "):\n"
+    //     //           << K_fu << std::endl;
+    //     const Eigen::MatrixXd KufC = K_fu.transpose() * cols_C;
+    //     // std::cout << "KufC (" << KufC.rows() << "x" << KufC.cols() <<
+    //     "):\n"
+    //     //           << KufC << std::endl;
+    //     const Eigen::MatrixXd ZC = Z * cols_C;
+    //     // std::cout << "ZC.transpose() (" << ZC.transpose().rows() << "x"
+    //     //           << ZC.transpose().cols() << "):\n"
+    //     //           << ZC.transpose() << std::endl;
+
+    //     const Eigen::MatrixXd KuZT = KufC * ZC.transpose();
+    //     const Eigen::MatrixXd KuZTZ = KuZT * Z;
+    //     const Eigen::MatrixXd KuZTZB = -KuZTZ * cols_B;
+    //     covariance_W.emplace_back(K_uu_ldlt.solve(KuZTZB));
+    //     // covariance_W.emplace_back(K_uu_ldlt.solve(
+    //     //     (-(K_fu.transpose() * cols_C) * Z.transpose() * Z) * cols_B));
+    //   }
+    // }
+
+    apply_map(block_start_indices, precompute_block, Base::threads_.get());
+
+    using FitType =
+        Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
+    return FitType(features, u, K_uu_ldlt, get_R(*B_qr), get_P(*B_qr), mean_w,
+                   W, covariance_Y, Z, A_ldlt, measurement_groups, v,
+                   B_qr->rank(), covariance_Ynot, K_PITC_ldlt, cols_Bs,
+                   cols_Cs);
+  }
+
+  template <typename FeatureType>
+  auto fit_from_prediction(const std::vector<FeatureType> &new_inducing_points,
+                           const JointDistribution &prediction_) const {
+    FitModel<PICGaussianProcessRegression,
+             Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>>>
+        output(*this,
+               Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>>());
+    Fit<PICGPFit<GrouperFunction, FeatureType, FeatureType>> &new_fit =
+        output.get_fit();
+
+    new_fit.train_features = new_inducing_points;
+
+    const Eigen::MatrixXd K_zz =
+        this->covariance_function_(new_inducing_points, Base::threads_.get());
+    new_fit.train_covariance = Eigen::SerializableLDLT(K_zz);
+
+    // We're going to need to take the sqrt of the new covariance which
+    // could be extremely small, so here we add a small nugget to avoid
+    // numerical instability
+    JointDistribution prediction(prediction_);
+    prediction.covariance.diagonal() += Eigen::VectorXd::Constant(
+        cast::to_index(prediction.size()), 1, details::DEFAULT_NUGGET);
+    new_fit.information = new_fit.train_covariance.solve(prediction.mean);
+
+    // Here P is the posterior covariance at the new inducing points.  If
+    // we consider the case where we rebase and then use the resulting fit
+    // to predict the new inducing points then we see that the predictive
+    // covariance (see documentation above) would be,:
+    //
+    //    C = K_zz - Q_zz + K_zz Sigma K_zz
+    //
+    // We can use this, knowing that at the inducing points K_zz = Q_zz, to
+    // derive our updated Sigma,
+    //
+    //    C = K_zz - K_zz + K_zz Sigma K_zz
+    //    C  = K_zz Sigma K_zz
+    //    Sigma = K_zz^-1 C K_zz^-1
+    //
+    // And since we need to store Sigma in sqrt form we get,
+    //
+    //    Sigma = (B_z^T B_z)^-1
+    //          = K_zz^-1 C K_zz^-1
+    //
+    // So by setting:
+    //
+    //    B_z = C^{-1/2} K_z
+    //
+    // We can then compute and store the QR decomposition of B
+    // as we do in a normal fit.
+    const Eigen::SerializableLDLT C_ldlt(prediction.covariance);
+    const Eigen::MatrixXd sigma_inv_sqrt = C_ldlt.sqrt_solve(K_zz);
+    const auto B_qr = QRImplementation::compute(sigma_inv_sqrt, nullptr);
+
+    new_fit.permutation_indices = get_P(*B_qr);
+    new_fit.sigma_R = get_R(*B_qr);
+    new_fit.numerical_rank = B_qr->rank();
+
+    return output;
+  }
+
+  // This is included to allow the SparseGP to be compatible with fits
+  // generated using a standard GP.
+  using Base::_predict_impl;
+
+  template <typename FeatureType, typename FitFeaturetype>
+  Eigen::VectorXd _predict_impl(
+      const std::vector<FeatureType> &features,
+      const Fit<PICGPFit<GrouperFunction, FitFeaturetype, FeatureType>>
+          &sparse_gp_fit,
+      PredictTypeIdentity<Eigen::VectorXd> &&) const {
+    const auto cross_cov =
+        this->covariance_function_(sparse_gp_fit.train_features, features);
+    Eigen::VectorXd mean =
+        gp_mean_prediction(cross_cov, sparse_gp_fit.information);
+    this->mean_function_.add_to(features, &mean);
+    return mean;
+  }
+
+  template <typename FeatureType, typename FitFeaturetype>
+  MarginalDistribution _predict_impl(
+      const std::vector<FeatureType> &features,
+      const Fit<PICGPFit<GrouperFunction, FitFeaturetype, FeatureType>>
+          &sparse_gp_fit,
+      PredictTypeIdentity<MarginalDistribution> &&) const {
+    const auto cross_cov =
+        this->covariance_function_(sparse_gp_fit.train_features, features);
+    Eigen::VectorXd mean =
+        gp_mean_prediction(cross_cov, sparse_gp_fit.information);
+    this->mean_function_.add_to(features, &mean);
+
+    Eigen::VectorXd marginal_variance(cast::to_index(features.size()));
+    for (Eigen::Index i = 0; i < marginal_variance.size(); ++i) {
+      marginal_variance[i] = this->covariance_function_(
+          features[cast::to_size(i)], features[cast::to_size(i)]);
+    }
+
+    const Eigen::MatrixXd Q_sqrt =
+        sparse_gp_fit.train_covariance.sqrt_solve(cross_cov);
+    const Eigen::VectorXd Q_diag =
+        Q_sqrt.cwiseProduct(Q_sqrt).array().colwise().sum();
+    marginal_variance -= Q_diag;
+
+    const Eigen::MatrixXd S_sqrt = sqrt_solve(
+        sparse_gp_fit.sigma_R, sparse_gp_fit.permutation_indices, cross_cov);
+    const Eigen::VectorXd S_diag =
+        S_sqrt.cwiseProduct(S_sqrt).array().colwise().sum();
+    marginal_variance += S_diag;
+
+    return MarginalDistribution(mean, marginal_variance);
+  }
+
+  // template <typename FeatureType, typename FitType>
+  // auto find_group(const FitType &fit, const FeatureType &feature) const {
+  //   const auto group = fit.measurement_groups.find(
+  //       independent_group_function_(without_measurement(feature)));
+  //   if (group == fit.measurement_groups.end()) {
+  //     std::cerr << "Group mapping failure for feature '"
+  //               << without_measurement(feature) << "' (group index '"
+  //               << independent_group_function_(without_measurement(feature))
+  //               << "')!" << std::endl;
+  //     assert(group != fit.measurement_groups.end() &&
+  //            "TODO(@peddie): the group function in a PIC GP model must cover
+  //            " "the entire feature domain in any fit.");
+  //   }
+  //   return group;
+  // }
+  // template <typename FitFeatureType>
+  // using InducingFeatureType =
+  //     typename std::decay<typename std::result_of<InducingPointStrategy(
+  //         const CovFunc &,
+  //         const std::vector<FitFeatureType> &)>::type::value_type>::type;
+
+  // typename std::decay<decltype(InducingPointStrategy>(
+  //     std::declval<CovFunc>(), std::declval<FitFeatureType>()))>::type;
+
+  // template <typename FitFeatureType>
+  // using InducingFeatureType =
+  //     typename std::decay<typename decltype(inducing_point_strategy_(
+  //         std::declval<const CovFunc &>(),
+  //         std::declval<const std::vector<FitFeatureType>
+  //         &>()))::value_type>:: type;
+
+  template <typename FeatureType, typename InducingFeatureType,
+            typename FitFeatureType>
+  JointDistribution _predict_impl(
+      const std::vector<FeatureType> &features,
+      const Fit<PICGPFit<
+          GrouperFunction,
+          InducingFeatureType, // <FitFeatureType>,
+                               // typename std::result_of<InducingPointStrategy(
+                               //     const CovFunc &, const
+                               //     std::vector<FitFeatureType> &)>::type,
+          FitFeatureType>> &sparse_gp_fit,
+      // const Fit<PICGPFit<GrouperFunction,
+      // InducingFeatureType<FitFeatureType>,
+      //                    FitFeatureType>> &sparse_gp_fit,
+      PredictTypeIdentity<JointDistribution> &&) const {
+
+    const auto find_group = [this, &sparse_gp_fit](const auto &feature) {
+      const auto group = sparse_gp_fit.measurement_groups.find(
+          independent_group_function_(without_measurement(feature)));
+      if (group == sparse_gp_fit.measurement_groups.end()) {
+        std::cerr << "Group mapping failure for feature '"
+                  << without_measurement(feature) << "' (group index '"
+                  << independent_group_function_(without_measurement(feature))
+                  << "')!" << std::endl;
+        assert(group != sparse_gp_fit.measurement_groups.end() &&
+               "TODO(@peddie): the group function in a PIC GP model must "
+               "cover the entire feature domain in any fit.");
+      }
+      return group;
+    };
+    // using CalculatedInducingFeatureType =
+    //     typename std::decay<typename std::result_of<InducingPointStrategy(
+    //         const CovFunc &,
+    //         const std::vector<FeatureType> &)>::type::value_type>::type;
+    // static_assert(
+    //     std::is_same<CalculatedInducingFeatureType,
+    //                  InducingFeatureType>::value &&
+    //     "A fitted PIC model must be able to compute the covariance between
+    //     its " "inducing point feature type and the feature type to be
+    //     predicted.");
+    // K_up
+    const Eigen::MatrixXd cross_cov =
+        this->covariance_function_(sparse_gp_fit.inducing_features, features);
+    const Eigen::MatrixXd prior_cov = this->covariance_function_(features);
+
+    Eigen::MatrixXd WV = Eigen::MatrixXd::Zero(
+        sparse_gp_fit.inducing_features.size(), features.size());
+
+    Eigen::VectorXd mean_correction(features.size());
+    std::vector<std::size_t> feature_to_block;
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      const auto group = find_group(features[j]);
+      // const auto group = sparse_gp_fit.measurement_groups.find(
+      //     independent_group_function_(without_measurement(features[j])));
+      // assert(group != sparse_gp_fit.measurement_groups.end() &&
+      //        "TODO(@peddie): the group function in a PIC GP model must
+      //        cover " "the entire feature domain in any fit.");
+      feature_to_block.push_back(
+          std::distance(sparse_gp_fit.measurement_groups.begin(), group));
+      if (sparse_gp_fit.A_ldlt.blocks.size() > 1) {
+        const std::vector<FeatureType> fvec = {features[j]};
+
+        const Eigen::VectorXd features_cov =
+            this->covariance_function_(group->second.dataset.features, fvec);
+        // std::cout << "Feature " << j << "(" << without_measurement(features[j])
+        //           << ") in group '" << group->first << "' ("
+        //           << group->second.block_size << ": "
+        //           << group->second.initial_row << " -> "
+        //           << group->second.initial_row + group->second.block_size << ")"
+        //           << std::endl;
+        // std::cout << "inducing_points: "
+        //           << sparse_gp_fit.inducing_features.size() << std::endl;
+        // std::cout << "cross_cov (" << cross_cov.rows() << "x"
+        //           << cross_cov.cols() << "):\n"
+        //           << cross_cov << std::endl;
+        // std::cout << "features_cov (" << features_cov.rows() << "x"
+        //           << features_cov.cols() << "):\n"
+        //           << features_cov << std::endl;
+        // std::cout
+        //     << "sparse_gp_fit.covariance_Y[group->second.block_index] ("
+        //     << sparse_gp_fit.covariance_Y[group->second.block_index].rows()
+        //     << "x"
+        //     << sparse_gp_fit.covariance_Y[group->second.block_index].cols()
+        //     << "):\n"
+        //     << sparse_gp_fit.covariance_Y[group->second.block_index]
+        //     << std::endl;
+        const Eigen::VectorXd kpuy =
+            cross_cov.transpose().row(j) *
+            sparse_gp_fit.covariance_Y[group->second.block_index];
+        // std::cout << "kpuy (" << kpuy.rows() << "x" << kpuy.cols() << "):\n"
+        //           << kpuy << std::endl;
+        const Eigen::VectorXd Vbp = features_cov - kpuy;
+        mean_correction[j] =
+            Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
+        const Eigen::VectorXd wvj =
+            sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[j]] * Vbp;
+        WV.col(j) = wvj;
+
+        // WV.col(j) =
+        //     sparse_gp_fit.covariance_W[group->second.block_index] *
+        //     (this->covariance_function_(group->second.dataset.features, fvec)
+        //     -
+        //      cross_cov.transpose() *
+        //          sparse_gp_fit.covariance_Y[group->second.block_index]);
+      }
+    }
+
+    // std::cout << "WV (" << WV.rows() << "x" << WV.cols() << "):\n"
+    //           << WV << std::endl;
+
+    // std::cout << "mean_correction (" << mean_correction.size()
+    //           << "): " << mean_correction.transpose() << std::endl;
+
+    Eigen::MatrixXd VSV(features.size(), features.size());
+
+    for (Eigen::Index row = 0; row < VSV.rows(); ++row) {
+      for (Eigen::Index col = 0; col <= row; ++col) {
+        const auto row_group = find_group(features[row]);
+        const auto column_group = find_group(features[col]);
+        // const auto row_group = sparse_gp_fit.measurement_groups.find(
+        //     independent_group_function_(without_measurement(features[row])));
+        // assert(row_group != sparse_gp_fit.measurement_groups.end() &&
+        //        "TODO(@peddie): the group function in a PIC GP model must
+        //        cover " "the entire feature domain in any fit.");
+
+        // const auto column_group = sparse_gp_fit.measurement_groups.find(
+        //     independent_group_function_(without_measurement(features[col])));
+        // assert(column_group != sparse_gp_fit.measurement_groups.end() &&
+        //        "TODO(@peddie): the group function in a PIC GP model must
+        //        cover " "the entire feature domain in any fit.");
+
+        // TODO(@peddie): these are K, not V!
+        const Eigen::VectorXd Q_row_p =
+            cross_cov.transpose().row(row) *
+            sparse_gp_fit.covariance_Y[row_group->second.block_index];
+        const std::vector<FeatureType> row_fvec = {features[row]};
+        const Eigen::VectorXd V_row_p =
+            this->covariance_function_(row_group->second.dataset.features,
+                                       row_fvec) -
+            Q_row_p;
+        const Eigen::VectorXd Q_column_p =
+            cross_cov.transpose().row(col) *
+            sparse_gp_fit.covariance_Y[column_group->second.block_index];
+        const std::vector<FeatureType> column_fvec = {features[col]};
+        const Eigen::VectorXd V_column_p =
+            this->covariance_function_(column_group->second.dataset.features,
+                                       column_fvec) -
+            Q_column_p;
+
+        VSV(row, col) = 0.;
+
+        assert(row < feature_to_block.size());
+        assert(col < feature_to_block.size());
+        if (feature_to_block[row] == feature_to_block[col]) {
+          VSV(row, col) =
+              sparse_gp_fit.A_ldlt.blocks[feature_to_block[row]]
+                  .sqrt_solve(V_row_p)
+                  .col(0)
+                  .dot(sparse_gp_fit.A_ldlt.blocks[feature_to_block[col]]
+                           .sqrt_solve(V_column_p)
+                           .col(0));
+          // std::cout << "VSV(" << row << ", " << col << ") same block ("
+          //           << feature_to_block[row] << "):\n"
+          //           << VSV(row, col) << std::endl;
+        }
+
+        const Eigen::MatrixXd rowblock = sparse_gp_fit.Z.block(
+            0, row_group->second.initial_row, sparse_gp_fit.Z.rows(),
+            row_group->second.block_size);
+        const Eigen::MatrixXd columnblock = sparse_gp_fit.Z.block(
+            0, column_group->second.initial_row, sparse_gp_fit.Z.rows(),
+            column_group->second.block_size);
+
+        // std::cout << "VSV(" << row << ", " << col << "):\n"
+        //           << "rowblock (" << rowblock.rows() << "x" << rowblock.cols()
+        //           << "):\n"
+        //           << rowblock << "\ncolblock (" << columnblock.rows() << "x"
+        //           << columnblock.cols() << "):\n"
+        //           << columnblock << "\nV_row_p (" << V_row_p.size()
+        //           << "): " << V_row_p.transpose() << "\nV_column_p ("
+        //           << V_column_p.size() << "): " << V_column_p.transpose()
+        //           << "\nvalue: "
+        //           << (rowblock * V_row_p).dot(columnblock * V_column_p)
+        //           << std::endl;
+
+        VSV(row, col) -= (rowblock * V_row_p).dot(columnblock * V_column_p);
+      }
+    }
+
+    VSV.triangularView<Eigen::Upper>() = VSV.transpose();
+
+    // std::cout << "VSV (" << VSV.rows() << "x" << VSV.cols() << "):\n"
+    //           << VSV << std::endl;
+    const Eigen::MatrixXd U = cross_cov.transpose() * WV;
+
+    // std::cout << "U (" << U.rows() << "x" << U.cols() << "):\n"
+    //           << U << std::endl;
+
+    const Eigen::MatrixXd S_sqrt =
+        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P, cross_cov);
+
+    const Eigen::MatrixXd Q_sqrt =
+        sparse_gp_fit.train_covariance.sqrt_solve(cross_cov);
+
+    const Eigen::MatrixXd max_explained = Q_sqrt.transpose() * Q_sqrt;
+    const Eigen::MatrixXd unexplained = S_sqrt.transpose() * S_sqrt;
+
+    const Eigen::MatrixXd pitc_covariance =
+        prior_cov - max_explained + unexplained;
+
+    // std::cout << "pitc_covariance (" << pitc_covariance.rows() << "x"
+    //           << pitc_covariance.cols() << "):\n"
+    //           << pitc_covariance << std::endl;
+
+    const Eigen::MatrixXd pic_correction = U + U.transpose() + VSV;
+
+    // std::cout << "pic_correction (" << pic_correction.rows() << "x"
+    //           << pic_correction.cols() << "):\n"
+    //           << pic_correction << std::endl;
+
+    JointDistribution pred(cross_cov.transpose() * sparse_gp_fit.information +
+                               mean_correction,
+                           pitc_covariance - pic_correction);
+
+    this->mean_function_.add_to(features, &pred.mean);
+
+    // Debug comparison
+
+    // Eigen::MatrixXd K_pic(sparse_gp_fit.train_features.size(),
+    // features.size()); for (std::size_t i = 0; i < features.size(); ++i) {
+    //   const auto group = find_group(features[i]);
+    //   K_pic.col(i) = sparse_gp_fit.cols_Cs[feature_to_block[i]] *
+    //                  cross_cov.transpose() *
+    //                  sparse_gp_fit.covariance_Ynot[feature_to_block[i]];
+    //   std::cout << "Ynot: K_pic.col(" << i << "): " <<
+    //   K_pic.col(i).transpose()
+    //             << std::endl;
+    //   K_pic.col(i).segment(group->second.initial_row,
+    //                        group->second.block_size) =
+    //       this->covariance_function_(std::vector<FeatureType>{features[i]},
+    //                                  group->second.dataset.features);
+    //   std::cout << "K*: K_pic.col(" << i << "): " << K_pic.col(i).transpose()
+    //             << std::endl;
+    // }
+
+    // std::cout << "K_pic (" << K_pic.rows() << "x" << K_pic.cols() << "):\n"
+    //           << K_pic << std::endl;
+
+    // const Eigen::MatrixXd K_pic_pitc =
+    //     sparse_gp_fit.K_PITC_ldlt.sqrt_solve(K_pic);
+    // JointDistribution alt_pred(cross_cov.transpose() *
+    //                                sparse_gp_fit.information,
+    //                            prior_cov - K_pic_pitc.transpose() *
+    //                            K_pic_pitc);
+
+    // std::cout << "alt covariance (" << alt_pred.covariance.rows() << "x"
+    //           << alt_pred.covariance.cols() << "):\n"
+    //           << alt_pred.covariance << std::endl;
+
+    return pred;
+  }
+
+  template <typename FeatureType>
+  double log_likelihood(const RegressionDataset<FeatureType> &dataset) const {
+    const auto u =
+        inducing_point_strategy_(this->covariance_function_, dataset.features);
+
+    BlockDiagonalLDLT A_ldlt;
+    Eigen::SerializableLDLT K_uu_ldlt;
+    Eigen::MatrixXd K_fu;
+    Eigen::VectorXd y;
+    compute_internal_components(u, dataset.features, dataset.targets, &A_ldlt,
+                                &K_uu_ldlt, &K_fu, &y);
+    const auto B_qr = compute_sigma_qr(K_uu_ldlt, A_ldlt, K_fu);
+    // The log likelihood for y ~ N(0, K) is:
+    //
+    //   L = 1/2 (n log(2 pi) + log(|K|) + y^T K^-1 y)
+    //
+    // where in our case we have
+    //   K = A + Q_ff
+    // and
+    //   Q_ff = K_fu K_uu^-1 K_uf
+    //
+    // First we get the determinant, |K|:
+    //
+    //   |K| = |A + Q_ff|
+    //       = |K_uu + K_uf A^-1 K_fu| |K_uu^-1| |A|
+    //       = |P R^T Q^T Q R P^T| |K_uu^-1| |A|
+    //       = |R^T R| |K_uu^-1| |A|
+    //       = |R|^2 |A| / |K_uu|
+    //
+    // Where the first equality comes from the matrix determinant lemma.
+    // https://en.wikipedia.org/wiki/Matrix_determinant_lemma#Generalization
+    //
+    // After which we can take the log:
+    //
+    //   log(|K|) = 2 log(|R|) + log(|A|) - log(|K_uu|)
+    //
+    const double log_det_a = A_ldlt.log_determinant();
+
+    const double log_det_r =
+        B_qr->matrixR().diagonal().array().cwiseAbs().log().sum();
+    const double log_det_K_uu = K_uu_ldlt.log_determinant();
+    const double log_det = log_det_a + 2 * log_det_r - log_det_K_uu;
+
+    // q = y^T K^-1 y
+    //   = y^T (A + Q_ff)^-1 y
+    //   = y^T (A^-1 - A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1) y
+    //   = y^T A^-1 y - y^T A^-1 K_fu (K_uu + K_uf A^-1 K_fu)^-1 K_uf A^-1) y
+    //   = y^T A^-1 y - y^T A^-1 K_fu (R^T R)^-1 K_uf A^-1) y
+    //   = y^T A^-1 y - (R^-T K_uf A^-1 y)^T (R^-T K_uf A^-1 y)
+    //   = y^T y_a - y_b^T y_b
+    //
+    // with y_b = R^-T K_uf y_a
+    const Eigen::VectorXd y_a = A_ldlt.solve(y);
+
+    Eigen::VectorXd y_b = K_fu.transpose() * y_a;
+    y_b = sqrt_solve(*B_qr, y_b);
+
+    double log_quadratic = y.transpose() * y_a;
+    log_quadratic -= y_b.transpose() * y_b;
+
+    const double rank = cast::to_double(y.size());
+    const double log_dimension = rank * log(2 * M_PI);
+
+    return -0.5 * (log_det + log_quadratic + log_dimension) +
+           this->prior_log_likelihood();
+  }
+
+  InducingPointStrategy get_inducing_point_strategy() const {
+    return inducing_point_strategy_;
+  }
+
+private:
+  // This method takes care of a lot of the common book keeping required to
+  // setup the Sparse Gaussian Process problem.  Namely, we want to get from
+  // possibly unordered features to a structured representation
+  // in the form K_ff = A_ff + Q_ff where Q_ff = K_fu K_uu^-1 K_uf and
+  // A_ff is block diagonal and is formed by subtracting Q_ff from K_ff.
+  //
+  template <typename InducingFeatureType, typename FeatureType>
+  void compute_internal_components(
+      const std::vector<InducingFeatureType> &inducing_features,
+      const std::vector<FeatureType> &out_of_order_features,
+      const MarginalDistribution &out_of_order_targets,
+      BlockDiagonalLDLT *A_ldlt, Eigen::SerializableLDLT *K_uu_ldlt,
+      Eigen::MatrixXd *K_fu, Eigen::VectorXd *y,
+      GroupMap<FeatureType> *measurement_groups,
+      Eigen::SerializableLDLT *K_PITC_ldlt) const {
+
+    ALBATROSS_ASSERT(A_ldlt != nullptr);
+    ALBATROSS_ASSERT(K_uu_ldlt != nullptr);
+    ALBATROSS_ASSERT(K_fu != nullptr);
+    ALBATROSS_ASSERT(y != nullptr);
+
+    const auto indexer =
+        group_by(out_of_order_features, independent_group_function_).indexers();
+
+    const auto out_of_order_measurement_features =
+        as_measurements(out_of_order_features);
+
+    std::vector<std::size_t> reordered_inds;
+    BlockDiagonal K_ff;
+    Eigen::Index begin_row_index = 0;
+    Eigen::Index block_index = 0;
+    // TODO(@peddie): compute these blocks asynchronously?
+    for (const auto &pair : indexer) {
+      reordered_inds.insert(reordered_inds.end(), pair.second.begin(),
+                            pair.second.end());
+      measurement_groups->operator[](pair.first) = PICGroup<FeatureType>{
+          begin_row_index, static_cast<Eigen::Index>(pair.second.size()),
+          block_index,
+          albatross::RegressionDataset<FeatureType>(
+              subset(out_of_order_features, pair.second),
+              out_of_order_targets.subset(pair.second))};
+      // measurement_groups->emplace(
+      //     std::piecewise_construct, std::forward_as_tuple(pair.first),
+      //     std::forward_as_tuple(
+      //         begin_row_index, static_cast<Eigen::Index>(pair.second.size()),
+      //         block_index,
+      //         albatross::RegressionDataset<FeatureType>(
+      //             subset(out_of_order_features, pair.second),
+      //             out_of_order_targets.subset(pair.second))));
+      auto subset_features =
+          subset(out_of_order_measurement_features, pair.second);
+      K_ff.blocks.emplace_back(this->covariance_function_(subset_features));
+      K_ff.blocks.back().diagonal() +=
+          subset(out_of_order_targets.covariance.diagonal(), pair.second);
+      begin_row_index += pair.second.size();
+      ++block_index;
+    }
+
+    const auto features =
+        subset(out_of_order_measurement_features, reordered_inds);
+    auto targets = subset(out_of_order_targets, reordered_inds);
+    *y = targets.mean;
+
+    this->mean_function_.remove_from(
+        subset(out_of_order_features, reordered_inds), &targets.mean);
+
+    *K_fu = this->covariance_function_(features, inducing_features,
+                                       Base::threads_.get());
+
+    auto K_uu =
+        this->covariance_function_(inducing_features, Base::threads_.get());
+
+    K_uu.diagonal() +=
+        inducing_nugget_.value * Eigen::VectorXd::Ones(K_uu.rows());
+
+    *K_uu_ldlt = K_uu.ldlt();
+    // P is such that:
+    //     Q_ff = K_fu K_uu^-1 K_uf
+    //          = K_fu K_uu^-T/2 K_uu^-1/2 K_uf
+    //          = P^T P
+    const Eigen::MatrixXd P = K_uu_ldlt->sqrt_solve(K_fu->transpose());
+
+    // We only need the diagonal blocks of Q_ff to get A
+    BlockDiagonal Q_ff_diag;
+    Eigen::Index i = 0;
+    for (const auto &pair : indexer) {
+      Eigen::Index cols = cast::to_index(pair.second.size());
+      auto P_cols = P.block(0, i, P.rows(), cols);
+      Q_ff_diag.blocks.emplace_back(P_cols.transpose() * P_cols);
+      i += cols;
+    }
+    auto A = K_ff - Q_ff_diag;
+
+    // It's possible that the inducing points will perfectly describe
+    // some of the data, in which case we need to add a bit of extra
+    // noise to make sure lambda is invertible.
+    for (auto &b : A.blocks) {
+      b.diagonal() +=
+          measurement_nugget_.value * Eigen::VectorXd::Ones(b.rows());
+    }
+
+    *A_ldlt = A.ldlt();
+
+    const Eigen::MatrixXd K_uuuf = K_uu_ldlt->sqrt_solve(K_fu->transpose());
+    Eigen::MatrixXd Kfuf = K_uuuf.transpose() * K_uuuf;
+    Eigen::Index row_offset = 0;
+    Eigen::Index col_offset = 0;
+    for (auto &b : A.blocks) {
+      Kfuf.block(row_offset, col_offset, b.rows(), b.cols()) += b;
+      row_offset += b.rows();
+      col_offset += b.cols();
+    }
+
+    // std::cout << "Kfuf (" << Kfuf.rows() << "x" << Kfuf.cols() << "):\n"
+    //           << Kfuf.format(Eigen::FullPrecision) << std::endl;
+
+    *K_PITC_ldlt = Kfuf.ldlt();
+  }
+
+  Parameter measurement_nugget_;
+  Parameter inducing_nugget_;
+  // InducingPointStrategy inducing_point_strategy_;
+  GrouperFunction independent_group_function_;
+};
+
+// rebase_inducing_points takes a Sparse GP which was fit using some set of
+// inducing points and creates a new fit relative to new inducing points.
+// Note that this will NOT be the equivalent to having fit the model with
+// the new inducing points since some information may have been lost in
+// the process.
+template <typename ModelType, typename GrouperFunction, typename FeatureType,
+          typename NewFeatureType>
+auto rebase_inducing_points(
+    const FitModel<ModelType,
+                   Fit<PICGPFit<GrouperFunction, NewFeatureType, FeatureType>>>
+        &fit_model,
+    const std::vector<NewFeatureType> &new_inducing_points) {
+  return fit_model.get_model().fit_from_prediction(
+      new_inducing_points, fit_model.predict(new_inducing_points).joint());
+}
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy>
+using SparseQRPicGaussianProcessRegression =
+    PICGaussianProcessRegression<CovFunc, GrouperFunction,
+                                 InducingPointStrategy, SPQRImplementation>;
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename InducingPointStrategy,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance_and_mean(
+    CovFunc &&covariance_function, MeanFunc &&mean_function,
+    GrouperFunction &&grouper_function, InducingPointStrategy &&strategy,
+    const std::string &model_name,
+    QRImplementation qr __attribute__((unused)) = DenseQRImplementation{}) {
+  return PICGaussianProcessRegression<
+      typename std::decay<CovFunc>::type, typename std::decay<MeanFunc>::type,
+      typename std::decay<GrouperFunction>::type,
+      typename std::decay<InducingPointStrategy>::type,
+      typename std::decay<QRImplementation>::type>(
+      std::forward<CovFunc>(covariance_function),
+      std::forward<MeanFunc>(mean_function),
+      std::forward<GrouperFunction>(grouper_function),
+      std::forward<InducingPointStrategy>(strategy), model_name);
+};
+
+template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance_and_mean(
+    CovFunc &&covariance_function, MeanFunc &&mean_function,
+    GrouperFunction &&grouper_function, const std::string &model_name,
+    QRImplementation qr = DenseQRImplementation{}) {
+  return pic_gp_from_covariance_and_mean(
+      std::forward<CovFunc>(covariance_function),
+      std::forward<MeanFunc>(mean_function),
+      std::forward<GrouperFunction>(grouper_function),
+      StateSpaceInducingPointStrategy(), model_name, qr);
+};
+
+template <typename CovFunc, typename GrouperFunction,
+          typename InducingPointStrategy,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance(CovFunc &&covariance_function,
+                            GrouperFunction &&grouper_function,
+                            InducingPointStrategy &&strategy,
+                            const std::string &model_name,
+                            QRImplementation qr = DenseQRImplementation{}) {
+  return pic_gp_from_covariance_and_mean<CovFunc, decltype(ZeroMean()),
+                                         GrouperFunction, InducingPointStrategy,
+                                         QRImplementation>(
+      std::forward<CovFunc>(covariance_function), ZeroMean(),
+      std::forward<GrouperFunction>(grouper_function),
+      std::forward<InducingPointStrategy>(strategy), model_name, qr);
+};
+
+template <typename CovFunc, typename GrouperFunction,
+          typename QRImplementation = DenseQRImplementation>
+auto pic_gp_from_covariance(CovFunc covariance_function,
+                            GrouperFunction grouper_function,
+                            const std::string &model_name,
+                            QRImplementation qr = DenseQRImplementation{}) {
+  return pic_gp_from_covariance_and_mean<
+      CovFunc, decltype(ZeroMean()), GrouperFunction,
+      decltype(StateSpaceInducingPointStrategy()), QRImplementation>(
+      std::forward<CovFunc>(covariance_function), ZeroMean(),
+      std::forward<GrouperFunction>(grouper_function),
+      StateSpaceInducingPointStrategy(), model_name, qr);
+};
+
+} // namespace albatross
+
+#endif /* INCLUDE_ALBATROSS_MODELS_PIC_GP_H_ */
diff --git a/include/albatross/src/models/sparse_common.hpp b/include/albatross/src/models/sparse_common.hpp
new file mode 100644
index 00000000..b5267b99
--- /dev/null
+++ b/include/albatross/src/models/sparse_common.hpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2024 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
+#define INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
+
+namespace albatross {
+
+namespace details {
+
+constexpr double DEFAULT_NUGGET = 1e-8;
+
+inline std::string measurement_nugget_name() { return "measurement_nugget"; }
+
+inline std::string inducing_nugget_name() { return "inducing_nugget"; }
+
+static constexpr double cSparseRNugget = 1.e-10;
+
+} // namespace details
+
+struct UniformlySpacedInducingPoints {
+
+  UniformlySpacedInducingPoints(std::size_t num_points_ = 10)
+      : num_points(num_points_) {}
+
+  template <typename CovarianceFunction>
+  std::vector<double> operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
+                                 const std::vector<double> &features) const {
+    double min = *std::min_element(features.begin(), features.end());
+    double max = *std::max_element(features.begin(), features.end());
+    return linspace(min, max, num_points);
+  }
+
+  std::size_t num_points;
+};
+
+struct StateSpaceInducingPointStrategy {
+
+  template <typename CovarianceFunction, typename FeatureType,
+            std::enable_if_t<has_valid_state_space_representation<
+                                 CovarianceFunction, FeatureType>::value,
+                             int> = 0>
+  auto operator()(const CovarianceFunction &cov,
+                  const std::vector<FeatureType> &features) const {
+    return cov.state_space_representation(features);
+  }
+
+  template <typename CovarianceFunction, typename FeatureType,
+            std::enable_if_t<!has_valid_state_space_representation<
+                                 CovarianceFunction, FeatureType>::value,
+                             int> = 0>
+  auto
+  operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
+             const std::vector<FeatureType> &features ALBATROSS_UNUSED) const
+      ALBATROSS_FAIL(
+          CovarianceFunction,
+          "Covariance function is missing state_space_representation method, "
+          "be sure _ssr_impl has been defined for the types concerned");
+};
+
+struct SPQRImplementation {
+  using QRType = Eigen::SPQR<Eigen::SparseMatrix<double>>;
+
+  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
+                                         ThreadPool *threads) {
+    return SPQR_create(m.sparseView(), threads);
+  }
+};
+
+struct DenseQRImplementation {
+  using QRType = Eigen::ColPivHouseholderQR<Eigen::MatrixXd>;
+
+  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
+                                         ThreadPool *threads
+                                         __attribute__((unused))) {
+    return std::make_unique<QRType>(m);
+  }
+};
+
+
+} // namespace albatross
+
+#endif  // INCLUDE_ALBATROSS_MODELS_SPARSE_COMMON_H_
\ No newline at end of file
diff --git a/include/albatross/src/models/sparse_gp.hpp b/include/albatross/src/models/sparse_gp.hpp
index 36234e4e..90985cb4 100644
--- a/include/albatross/src/models/sparse_gp.hpp
+++ b/include/albatross/src/models/sparse_gp.hpp
@@ -15,81 +15,10 @@
 
 namespace albatross {
 
-namespace details {
-
-constexpr double DEFAULT_NUGGET = 1e-8;
-
-inline std::string measurement_nugget_name() { return "measurement_nugget"; }
-
-inline std::string inducing_nugget_name() { return "inducing_nugget"; }
-
-static constexpr double cSparseRNugget = 1.e-10;
-
-} // namespace details
-
 template <typename CovFunc, typename MeanFunc, typename GrouperFunction,
           typename InducingPointStrategy, typename QRImplementation>
 class SparseGaussianProcessRegression;
 
-struct UniformlySpacedInducingPoints {
-
-  UniformlySpacedInducingPoints(std::size_t num_points_ = 10)
-      : num_points(num_points_) {}
-
-  template <typename CovarianceFunction>
-  std::vector<double> operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
-                                 const std::vector<double> &features) const {
-    double min = *std::min_element(features.begin(), features.end());
-    double max = *std::max_element(features.begin(), features.end());
-    return linspace(min, max, num_points);
-  }
-
-  std::size_t num_points;
-};
-
-struct StateSpaceInducingPointStrategy {
-
-  template <typename CovarianceFunction, typename FeatureType,
-            std::enable_if_t<has_valid_state_space_representation<
-                                 CovarianceFunction, FeatureType>::value,
-                             int> = 0>
-  auto operator()(const CovarianceFunction &cov,
-                  const std::vector<FeatureType> &features) const {
-    return cov.state_space_representation(features);
-  }
-
-  template <typename CovarianceFunction, typename FeatureType,
-            std::enable_if_t<!has_valid_state_space_representation<
-                                 CovarianceFunction, FeatureType>::value,
-                             int> = 0>
-  auto
-  operator()(const CovarianceFunction &cov ALBATROSS_UNUSED,
-             const std::vector<FeatureType> &features ALBATROSS_UNUSED) const
-      ALBATROSS_FAIL(
-          CovarianceFunction,
-          "Covariance function is missing state_space_representation method, "
-          "be sure _ssr_impl has been defined for the types concerned");
-};
-
-struct SPQRImplementation {
-  using QRType = Eigen::SPQR<Eigen::SparseMatrix<double>>;
-
-  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
-                                         ThreadPool *threads) {
-    return SPQR_create(m.sparseView(), threads);
-  }
-};
-
-struct DenseQRImplementation {
-  using QRType = Eigen::ColPivHouseholderQR<Eigen::MatrixXd>;
-
-  static std::unique_ptr<QRType> compute(const Eigen::MatrixXd &m,
-                                         ThreadPool *threads
-                                         __attribute__((unused))) {
-    return std::make_unique<QRType>(m);
-  }
-};
-
 template <typename FeatureType> struct SparseGPFit {};
 
 template <typename FeatureType> struct Fit<SparseGPFit<FeatureType>> {
diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
new file mode 100644
index 00000000..9879b24f
--- /dev/null
+++ b/tests/test_pic_gp.cc
@@ -0,0 +1,650 @@
+/*
+ * Copyright (C) 2024 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <fstream>
+#include <gtest/gtest.h>
+
+// #include "albatross/src/eigen/serializable_ldlt.hpp"
+// #include "albatross/src/linalg/block_diagonal.hpp"
+#include "test_models.h"
+#include <albatross/Common>
+#include <chrono>
+
+#include <albatross/SparseGP>
+#include <random>
+
+namespace albatross {
+
+struct LeaveOneIntervalOut {
+
+  LeaveOneIntervalOut(){};
+  explicit LeaveOneIntervalOut(double group_domain_size_)
+      : group_domain_size(group_domain_size_){};
+
+  int operator()(const double &f) const {
+    return static_cast<int>(floor(f / group_domain_size));
+  }
+
+  int operator()(const Measurement<double> &f) const {
+    return static_cast<int>(floor(f.value / group_domain_size));
+  }
+
+  double group_domain_size = 5.;
+};
+
+TEST(TestPicGP, TestPredictionExists) {
+  const auto dataset = make_toy_linear_data();
+  UniformlySpacedInducingPoints strategy(8);
+  LeaveOneIntervalOut grouper;
+  const auto pic =
+      pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                             strategy, "pic", DenseQRImplementation{});
+
+  const auto pic_fit = pic.fit(dataset);
+
+  const auto test_features = linspace(0.1, 9.9, 11);
+
+  const auto pic_pred =
+      pic_fit.predict_with_measurement_noise(test_features).joint();
+
+  EXPECT_GT(pic_pred.mean.size(), 0);
+}
+
+static constexpr std::size_t kNumRandomFits = 1000;
+
+static constexpr std::size_t kMaxSize = 20000;
+
+TEST(TestPicGP, ComparePrediction) {
+  std::default_random_engine gen(22);
+  std::uniform_int_distribution<std::size_t> data_sizes{1, kMaxSize};
+  std::uniform_int_distribution<std::size_t> predict_sizes{1, 200};
+  //   std::uniform_real_distribution<double> group_domain_sizes{1.e-3, 7.};
+
+  auto workers = make_shared_thread_pool(8);
+
+  std::ofstream csv_out(
+      "/home/peddie/orion-engine/third_party/albatross/times.csv");
+
+  csv_out << "idx,n_train,n_inducing,n_blocks,blocks_used,n_predict,dense_fit_"
+             "time_ns,dense_pred_"
+             "time_ns,pic_fit_time_ns,pic_pred_time_ns,mean_err_norm,cov_err_"
+             "norm\n";
+
+  for (std::size_t i = 0; i < kNumRandomFits; ++i) {
+    // static constexpr double kLargestTrainPoint =
+    //   static_cast<double>(kNumTrainPoints) - 1.;
+    // static constexpr double kSpatialBuffer = 0;
+    // static constexpr std::size_t kNumBlocks = 5;
+
+    // UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+    // LeaveOneIntervalOut grouper(
+    //     kLargestTrainPoint / static_cast<double>(kNumBlocks) + 1e-8);
+    const std::size_t n_train = data_sizes(gen);
+    const auto n_train_fraction = [&n_train](double div) {
+      return std::max(std::size_t{1}, static_cast<std::size_t>(ceil(
+                                          static_cast<double>(n_train) / div)));
+    };
+    std::uniform_int_distribution<std::size_t> inducing_sizes{
+        n_train_fraction(30.), n_train_fraction(5.)};
+    const std::size_t n_inducing_points = inducing_sizes(gen);
+    const double largest_training_point = static_cast<double>(n_train) - 1.;
+    std::uniform_int_distribution<std::size_t> num_blocks_dist{2, 50};
+    const std::size_t n_blocks = num_blocks_dist(gen);
+
+    const std::size_t n_predict = predict_sizes(gen);
+
+    UniformlySpacedInducingPoints strategy(n_inducing_points);
+    LeaveOneIntervalOut grouper(largest_training_point /
+                                static_cast<double>(n_blocks));
+    auto direct =
+        gp_from_covariance(make_simple_covariance_function(), "direct");
+    auto pic =
+        pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                               strategy, "pic", DenseQRImplementation{});
+    pic.set_thread_pool(workers);
+
+    auto dataset = make_toy_linear_data(5, 1, 0.1, n_train);
+    const auto begin_direct_fit = std::chrono::steady_clock::now();
+    auto direct_fit = direct.fit(dataset);
+    const auto end_direct_fit = std::chrono::steady_clock::now();
+    const auto begin_pic_fit = std::chrono::steady_clock::now();
+    auto pic_fit = pic.fit(dataset);
+    const auto end_pic_fit = std::chrono::steady_clock::now();
+
+    std::set<
+        std::result_of_t<LeaveOneIntervalOut(decltype(dataset.features[0]))>>
+        train_indices;
+    std::transform(dataset.features.begin(), dataset.features.end(),
+                   std::inserter(train_indices, train_indices.begin()),
+                   grouper);
+    auto test_features = linspace(0, largest_training_point, n_predict);
+    // const std::size_t test_features_before = test_features.size();
+    test_features.erase(
+        std::remove_if(test_features.begin(), test_features.end(),
+                       [&train_indices, &grouper](const auto &f) {
+                         // std::cout << "f: " << f << "; group: "
+                         // << grouper(f)
+                         //           << std::endl;
+                         return train_indices.count(grouper(f)) == 0;
+                       }),
+        test_features.end());
+
+    if (test_features.empty()) {
+      continue;
+    }
+
+    // const std::size_t test_features_after = test_features.size();
+
+    // std::cout << "train_indices (" << train_indices.size() << "): ";
+    // for (const auto &index : train_indices) {
+    //   std::cout << index << ", ";
+    // }
+    // std::cout << std::endl;
+
+    // std::cout << "test_features, " << test_features_before << " -> "
+    //           << test_features_after << ": ";
+    // for (const auto &feature : test_features) {
+    //   std::cout << feature << ", ";
+    // }
+    // std::cout << std::endl;
+    const auto begin_direct_pred = std::chrono::steady_clock::now();
+    auto direct_pred =
+        direct_fit.predict_with_measurement_noise(test_features).joint();
+    const auto end_direct_pred = std::chrono::steady_clock::now();
+    auto pic_pred =
+        pic_fit.predict_with_measurement_noise(test_features).joint();
+    const auto end_pic_pred = std::chrono::steady_clock::now();
+
+    csv_out << i << ',' << n_train << ',' << n_inducing_points << ','
+            << n_blocks << ',' << train_indices.size() << ',' << n_predict
+            << ','
+            << std::chrono::duration_cast<std::chrono::nanoseconds>(
+                   end_direct_fit - begin_direct_fit)
+                   .count()
+            << ','
+            << std::chrono::duration_cast<std::chrono::nanoseconds>(
+                   end_direct_pred - begin_direct_pred)
+                   .count()
+            << ','
+            << std::chrono::duration_cast<std::chrono::nanoseconds>(
+                   end_pic_fit - begin_pic_fit)
+                   .count()
+            << ','
+            << std::chrono::duration_cast<std::chrono::nanoseconds>(
+                   end_pic_pred - end_direct_pred)
+                   .count()
+            << ',' << (pic_pred.mean - direct_pred.mean).norm() << ','
+            << (pic_pred.covariance - direct_pred.covariance).norm() << '\n';
+
+    const double pic_error = (pic_pred.mean - direct_pred.mean).norm();
+    EXPECT_LT(pic_error, 1e-4);
+    // << "|u|: " << n_inducing_points << "; |f|: " << dataset.size()
+    // << "; |p|: " << test_features.size()
+    // << "; B width: " << grouper.group_domain_size
+    // << "; n_B: " << train_indices.size();
+
+    // const double pic_cov_error =
+    //     (pic_pred.covariance - direct_pred.covariance).norm();
+    // EXPECT_LT(pic_cov_error, 1e-6)
+    //     << "|u|: " << n_inducing_points << "; |f|: " << dataset.size()
+    //     << "; |p|: " << test_features.size()
+    //     << "; B width: " << grouper.group_domain_size
+    //     << "; n_B: " << train_indices.size();
+
+    if (i % 10 == 0) {
+      csv_out.flush();
+      std::cout << i << std::endl;
+    }
+  }
+  csv_out << std::endl;
+  csv_out.close();
+}
+
+template <typename CovarianceType, typename InducingFeatureType,
+          typename GrouperFunction>
+class BruteForcePIC
+    : public CovarianceFunction<
+          BruteForcePIC<CovarianceType, InducingFeatureType, GrouperFunction>> {
+public:
+  CovarianceType cov_;
+  std::vector<InducingFeatureType> inducing_points_;
+  GrouperFunction grouper_;
+  Eigen::LDLT<Eigen::MatrixXd> K_uu_ldlt_;
+
+  BruteForcePIC(const std::vector<InducingFeatureType> &inducing_points,
+                CovarianceType &&cov, GrouperFunction &&grouper)
+      : cov_{cov}, inducing_points_{inducing_points}, grouper_{grouper},
+        K_uu_ldlt_{cov_(inducing_points_, inducing_points_).ldlt()} {}
+
+  template <typename X> double _call_impl(const X &x, const X &y) const {
+    if (grouper_(x) == grouper_(y)) {
+      return cov_(x, y);
+    }
+
+    Eigen::VectorXd K_xu(inducing_points_.size());
+    Eigen::VectorXd K_uy(inducing_points_.size());
+    for (Eigen::Index i = 0;
+         i < static_cast<Eigen::Index>(inducing_points_.size()); ++i) {
+      K_xu[i] = cov_(x, inducing_points_[i]);
+      K_uy[i] = cov_(inducing_points_[i], y);
+    }
+    // const Eigen::VectorXd K_uy = cov_(inducing_points_, y);
+    return K_xu.dot(K_uu_ldlt_.solve(K_uy));
+  }
+};
+
+template <typename CovarianceType, typename InducingFeatureType,
+          typename GrouperFunction>
+auto make_brute_force_pic_covariance(
+    const std::vector<InducingFeatureType> &inducing_points,
+    CovarianceType &&cov, GrouperFunction &&grouper) {
+  return BruteForcePIC<CovarianceType, InducingFeatureType, GrouperFunction>(
+      inducing_points, std::forward<CovarianceType>(cov),
+      std::forward<GrouperFunction>(grouper));
+}
+
+TEST(TestPicGP, ScalarEquivalence) {
+  static constexpr std::size_t kNumTrainPoints = 3;
+  static constexpr std::size_t kNumTestPoints = 1;
+  static constexpr std::size_t kNumInducingPoints = 3;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  // Set the grouper so that all the simple test data falls within one
+  // group.
+  LeaveOneIntervalOut grouper(10);
+  auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+
+  auto direct_fit = direct.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto direct_pred =
+      direct_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  const double pic_error = (pic_pred.mean - direct_pred.mean).norm();
+  EXPECT_LT(pic_error, 1e-7)
+      << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
+      << "; |p|: " << test_features.size()
+      << "; B width: " << grouper.group_domain_size;
+
+  const double pic_cov_error =
+      (pic_pred.covariance - direct_pred.covariance).norm();
+  EXPECT_LT(pic_cov_error, 1e-7)
+      << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
+      << "; |p|: " << test_features.size()
+      << "; B width: " << grouper.group_domain_size
+      << "; n_B: " << kNumTrainPoints << "\n"
+      << pic_pred.covariance << "\n"
+      << direct_pred.covariance;
+}
+
+TEST(TestPicGP, SingleBlockEquivalence) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 4;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  // Set the grouper so that all the simple test data falls within one
+  // group.
+  LeaveOneIntervalOut grouper(10);
+  auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+
+  auto direct_fit = direct.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto direct_pred =
+      direct_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  const double pic_error = (pic_pred.mean - direct_pred.mean).norm();
+  EXPECT_LT(pic_error, 2e-7)
+      << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
+      << "; |p|: " << test_features.size()
+      << "; B width: " << grouper.group_domain_size << "\n"
+      << pic_pred.mean.transpose() << "\n"
+      << direct_pred.mean.transpose();
+
+  const double pic_cov_error =
+      (pic_pred.covariance - direct_pred.covariance).norm();
+  EXPECT_LT(pic_cov_error, 2e-7)
+      << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
+      << "; |p|: " << test_features.size()
+      << "; B width: " << grouper.group_domain_size
+      << "; n_B: " << kNumTrainPoints << "\n"
+      << pic_pred.covariance << "\n"
+      << direct_pred.covariance;
+}
+
+template <typename CovarianceType, typename FeatureType,
+          typename InducingStrategy, typename GrouperFunction>
+JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
+                           const std::vector<FeatureType> &predict,
+                           CovarianceType &&cov, InducingStrategy &&inducing,
+                           GrouperFunction &&grouper) {
+  const auto u = inducing(cov, data.features);
+
+  auto bfp_cov = make_brute_force_pic_covariance(inducing(cov, data.features),
+                                                 cov, grouper);
+
+  BlockDiagonal K_ff;
+  const auto indexers = group_by(data.features, grouper).indexers();
+  std::vector<std::size_t> reordered_inds;
+  for (const auto &pair : indexers) {
+    reordered_inds.insert(reordered_inds.end(), pair.second.begin(),
+                          pair.second.end());
+    K_ff.blocks.emplace_back(cov(subset(data.features, pair.second)));
+    K_ff.blocks.back().diagonal() +=
+        subset(data.targets.covariance.diagonal(), pair.second);
+  }
+  const std::vector<FeatureType> ordered_features =
+      subset(data.features, reordered_inds);
+
+  Eigen::MatrixXd K_uu = cov(u, u);
+  K_uu.diagonal() += Eigen::VectorXd::Constant(K_uu.rows(), 1e-8);
+  const Eigen::MatrixXd K_fu = cov(ordered_features, u);
+
+  const Eigen::SerializableLDLT K_uu_ldlt = K_uu.ldlt();
+  const Eigen::MatrixXd P = K_uu_ldlt.sqrt_solve(K_fu.transpose());
+  const Eigen::MatrixXd Q_ff = P.transpose() * P;
+  BlockDiagonal Q_ff_diag;
+  Eigen::Index i = 0;
+  for (const auto &pair : indexers) {
+    const Eigen::Index cols = cast::to_index(pair.second.size());
+    auto P_cols = P.block(0, i, P.rows(), cols);
+    Q_ff_diag.blocks.emplace_back(P_cols.transpose() * P_cols);
+    i += cols;
+  }
+  // auto Lambda = K_ff - Q_ff_diag;
+
+  Eigen::MatrixXd Q_ff_lambda = Q_ff;
+  Eigen::Index k = 0;
+  for (const auto &block : K_ff.blocks) {
+    Q_ff_lambda.block(k, k, block.rows(), block.rows()) = block;
+    Q_ff_lambda.block(k, k, block.rows(), block.rows()).diagonal() +=
+        Eigen::VectorXd::Constant(block.rows(), 1e-2);
+    k += block.rows();
+  }
+
+  const auto print_matrix = [](const Eigen::MatrixXd &m, std::string &&label) {
+    std::cout << label << " (" << m.rows() << "x" << m.cols() << "):\n"
+              << m.format(Eigen::FullPrecision) << std::endl;
+  };
+
+  const auto print_vector = [](const Eigen::VectorXd &v, std::string &&label) {
+    std::cout << label << " (" << v.size()
+              << "): " << v.transpose().format(Eigen::FullPrecision)
+              << std::endl;
+  };
+
+  print_matrix(Q_ff_lambda, "Q_ff + Lambda");
+  // const BlockDiagonalLDLT Lambda_ldlt = Lambda.ldlt();
+
+  print_matrix(bfp_cov(ordered_features), "BFP: Q_ff + Lambda");
+
+  const Eigen::SerializableLDLT S = Q_ff_lambda.ldlt();
+
+  print_matrix(S.matrixL(), "PITC L");
+  print_vector(S.vectorD(), "PITC D");
+
+  Eigen::MatrixXd K_PIC(ordered_features.size(), predict.size());
+  Eigen::MatrixXd K_PITC(ordered_features.size(), predict.size());
+
+  Eigen::MatrixXd V_PIC =
+      Eigen::MatrixXd::Zero(ordered_features.size(), predict.size());
+
+  Eigen::MatrixXd Y = K_uu_ldlt.solve(K_fu.transpose());
+  print_matrix(Y, "Y");
+
+  Eigen::MatrixXd W = K_uu_ldlt.solve(S.solve(K_fu).transpose());
+  print_matrix(W, "W");
+
+  for (Eigen::Index f = 0; f < cast::to_index(ordered_features.size()); ++f) {
+    for (Eigen::Index p = 0; p < cast::to_index(predict.size()); ++p) {
+      const std::vector<FeatureType> pv{predict[p]};
+      K_PITC(f, p) = K_fu.row(f).dot(K_uu_ldlt.solve(cov(u, pv)).col(0));
+      if (grouper(predict[p]) == grouper(ordered_features[f])) {
+        K_PIC(f, p) = cov(ordered_features[f], predict[p]);
+        V_PIC(f, p) =
+            K_PIC(f, p) - K_fu.row(f).dot(K_uu_ldlt.solve(cov(u, pv)).col(0));
+      } else {
+        K_PIC(f, p) = K_fu.row(f).dot(K_uu_ldlt.solve(cov(u, pv)).col(0));
+      }
+    }
+  }
+
+  const Eigen::MatrixXd K_pu = cov(predict, u);
+  // Eigen::Index j = 0;
+  // Eigen::Index blk = 0;
+  // std::vector<Eigen::MatrixXd> WW;
+
+  const Eigen::MatrixXd WBW = K_uu_ldlt.solve(S.solve(K_fu).transpose());
+  // Eigen::MatrixXd WBW =
+  //     Eigen::MatrixXd::Zero(u.size(), ordered_features.size());
+
+  // Eigen::MatrixXd K_Su = S.solve(K_fu);
+  // std::stringstream Ksus;
+  // Ksus << "K_Su[" << blk << "]";
+  // print_matrix(K_Su, Ksus.str());
+
+  // for (const auto &block : K_ff.blocks) {
+  //   // Eigen::MatrixXd K_Au = K_fu;
+  //   // K_Au.middleRows(j, block.cols()) =
+  //   //     Eigen::MatrixXd::Zero(block.cols(), K_Au.cols());
+  //   // std::stringstream Kaus;
+  //   // std::cout << "Block " << blk << " j = " << j << " size = " << block.cols()
+  //   //           << std::endl;
+  //   // Kaus << "K_Au[" << blk << "]";
+  //   // print_matrix(K_Au, Kaus.str());
+
+  //   Eigen::MatrixXd WB = K_uu_ldlt.solve(K_Su.transpose());
+  //   std::stringstream WBs;
+  //   WBs << "W_B[" << blk << "]";
+  //   print_matrix(WB, WBs.str());
+  //   WB.leftCols(j) = Eigen::MatrixXd::Zero(K_fu.cols(), j);
+  //   WB.rightCols(ordered_features.size() - j - block.cols()) =
+  //       Eigen::MatrixXd::Zero(K_fu.cols(),
+  //                             ordered_features.size() - j - block.cols());
+  //   print_matrix(WB, WBs.str());
+
+  //   print_matrix(WB * V_PIC, "WB * V");
+
+  //   WW.push_back(WB);
+  //   WBW += WB;
+
+  //   blk++;
+  //   j += block.cols();
+  // }
+
+  print_matrix(K_PIC, "K_PIC");
+  print_matrix(bfp_cov(ordered_features, predict), "BFP: K_PIC");
+  print_matrix(K_PITC, "K_PITC");
+
+  print_matrix(V_PIC, "V_PIC");
+
+  print_matrix(WBW, "W");
+  const Eigen::MatrixXd U = K_pu * WBW * V_PIC;
+  print_matrix(U, "U");
+
+  auto SV = S.sqrt_solve(V_PIC);
+  const Eigen::MatrixXd VSV = V_PIC.transpose() * S.solve(V_PIC);  // SV.transpose() * SV;
+  print_matrix(VSV, "VSV");
+
+  const Eigen::MatrixXd predict_prior = cov(predict);
+
+  print_matrix(predict_prior, "prior");
+
+  print_matrix(bfp_cov(predict), "BFP: prior");
+
+  // auto KK = K_PITC_ldlt.sqrt_solve(K_PIC);
+  // const Eigen::MatrixXd explained_cov = KK.transpose() * KK;
+  const Eigen::MatrixXd explained_cov =
+      K_PIC.transpose() * S.solve(K_PIC);
+  print_matrix(explained_cov, "explained");
+  const Eigen::MatrixXd predict_cov = predict_prior - explained_cov;
+
+  print_matrix(predict_cov, "K_**");
+
+  const Eigen::MatrixXd PITC_cov =
+      predict_prior - K_PITC.transpose() * S.solve(K_PITC);
+
+  const Eigen::MatrixXd PIC_cov = PITC_cov - U - U.transpose() - VSV;
+  print_matrix(PITC_cov, "PTIC cov");
+  print_matrix(PIC_cov, "PIC cov");
+
+  print_matrix(predict_cov - PITC_cov, "PIC - PITC");
+  print_matrix(predict_cov - PIC_cov, "PIC - factored");
+
+  return JointDistribution{};
+}
+
+TEST(TestPicGP, EmitCSV) {
+  static constexpr std::size_t kNumTrainPoints = 12;
+  static constexpr std::size_t kNumTestPoints = 40;
+  static constexpr std::size_t kNumInducingPoints = 8;
+
+  static constexpr double kLargestTrainPoint =
+      static_cast<double>(kNumTrainPoints) - 1.;
+  static constexpr double kSpatialBuffer = 0;
+  static constexpr std::size_t kNumBlocks = 2;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(kLargestTrainPoint /
+                              static_cast<double>(kNumBlocks) + 1e-8);
+
+  auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
+
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+  auto pitc =
+      sparse_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                strategy, "sparse", DenseQRImplementation{});
+
+  auto dataset =
+      make_toy_linear_data(-kLargestTrainPoint / 2., 1, 0.1, kNumTrainPoints);
+
+  auto bfp_cov = make_brute_force_pic_covariance(
+      strategy(make_simple_covariance_function(), dataset.features),
+      make_simple_covariance_function(), grouper);
+  auto bfp = gp_from_covariance(bfp_cov);
+
+  auto test_features = linspace(
+      kSpatialBuffer, kLargestTrainPoint - kSpatialBuffer, kNumTestPoints);
+  auto direct_fit = direct.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+  auto pitc_fit = pitc.fit(dataset);
+  std::cout << "BFP: ";
+  auto bfp_fit = bfp.fit(dataset);
+
+  auto direct_pred =
+      direct_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  auto pitc_pred =
+      pitc_fit.predict_with_measurement_noise(test_features).joint();
+  std::cout << "BFP: ";
+  auto bfp_pred = bfp_fit.predict_with_measurement_noise(test_features).joint();
+
+  std::ofstream csv_out(
+      "/home/peddie/orion-engine/third_party/albatross/example.csv");
+
+  csv_out << "type,idx,x,mean,marginal,group\n";
+  for (std::size_t pred = 0; pred < direct_pred.size(); ++pred) {
+    csv_out << "direct," << std::setprecision(16) << pred << ','
+            << test_features[pred] << ',' << direct_pred.mean[pred] << ','
+            << direct_pred.covariance(pred, pred) << ','
+            << grouper(test_features[pred]) << '\n';
+  }
+
+  for (std::size_t pred = 0; pred < pitc_pred.size(); ++pred) {
+    csv_out << "pitc," << std::setprecision(16) << pred << ','
+            << test_features[pred] << ',' << pitc_pred.mean[pred] << ','
+            << pitc_pred.covariance(pred, pred) << ','
+            << grouper(test_features[pred]) << '\n';
+  }
+
+  for (std::size_t pred = 0; pred < pic_pred.size(); ++pred) {
+    csv_out << "pic," << std::setprecision(16) << pred << ','
+            << test_features[pred] << ',' << pic_pred.mean[pred] << ','
+            << pic_pred.covariance(pred, pred) << ','
+            << grouper(test_features[pred]) << '\n';
+  }
+
+  for (std::size_t pred = 0; pred < bfp_pred.size(); ++pred) {
+    csv_out << "bfp," << std::setprecision(16) << pred << ','
+            << test_features[pred] << ',' << bfp_pred.mean[pred] << ','
+            << bfp_pred.covariance(pred, pred) << ','
+            << grouper(test_features[pred]) << '\n';
+  }
+
+  csv_out << std::endl;
+  csv_out.close();
+
+  std::ofstream points_out("/home/peddie/orion-engine/third_party/"
+                           "albatross/example_points.csv");
+
+  points_out << "type,x,y\n";
+  for (std::size_t i = 0; i < dataset.size(); ++i) {
+    points_out << "train," << dataset.features[i] << ','
+               << dataset.targets.mean[i] << '\n';
+  }
+
+  for (const auto &f : pic_fit.get_fit().inducing_features) {
+    points_out << "inducing," << f << ",0\n";
+  }
+  points_out << std::endl;
+  points_out.close();
+
+  // std::cout << "BFP L: "
+  //           << Eigen::MatrixXd(bfp_fit.get_fit().train_covariance.matrixL())
+  //                  .format(Eigen::FullPrecision)
+  //           << std::endl;
+  // std::cout << "BFP D: "
+  //           <<
+  //           bfp_fit.get_fit().train_covariance.vectorD().transpose().format(
+  //                  Eigen::FullPrecision)
+  //           << std::endl;
+
+  // const auto test_result =
+  //     test_pic(dataset, test_features, make_simple_covariance_function(),
+  //              strategy, grouper);
+
+  // std::cout << "bfp_pred.covariance (" << bfp_pred.covariance.rows() << "x"
+  //           << bfp_pred.covariance.cols() << "):\n"
+  //           << bfp_pred.covariance << std::endl;
+
+  const double pic_error = (pic_pred.mean - direct_pred.mean).norm();
+  EXPECT_LT(pic_error, 1e-7);
+  // << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
+  // << "; |p|: " << test_features.size()
+  // << "; B width: " << grouper.group_domain_size << "\n"
+  // << pic_pred.mean.transpose() << "\n"
+  // << direct_pred.mean.transpose();
+
+  const double pic_cov_error =
+      (pic_pred.covariance - direct_pred.covariance).norm();
+  EXPECT_LT(pic_cov_error, 1e-7);
+  // << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
+  // << "; |p|: " << test_features.size()
+  // << "; B width: " << grouper.group_domain_size
+  // << "; n_B: " << kNumTrainPoints << "\n"
+  // << pic_pred.covariance << "\n"
+  // << direct_pred.covariance;
+}
+
+} // namespace albatross
\ No newline at end of file

From 63c0d0c5403563550fba425af381e4ca38604b30 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 11 Apr 2024 14:21:46 +1000
Subject: [PATCH 05/13] Measurement remover

---
 .../src/covariance_functions/measurement.hpp         | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/albatross/src/covariance_functions/measurement.hpp b/include/albatross/src/covariance_functions/measurement.hpp
index 732093d4..51dbd1d5 100644
--- a/include/albatross/src/covariance_functions/measurement.hpp
+++ b/include/albatross/src/covariance_functions/measurement.hpp
@@ -110,6 +110,18 @@ MeasurementOnly<SubCovariance> measurement_only(const SubCovariance &cov) {
   return MeasurementOnly<SubCovariance>(cov);
 }
 
+  // template <typename T, typename R> R without_measurement(T &&t);
+
+template <typename T> T without_measurement(Measurement<T> &&m) {
+  return m.value;
+}
+template <typename T> const T &without_measurement(const Measurement<T> &m) {
+  return m.value;
+}
+
+template <typename T> T without_measurement(T &&t) { return t; }
+template <typename T> const T &without_measurement(const T &t) { return t; }
+
 } // namespace albatross
 
 #endif /* INCLUDE_ALBATROSS_SRC_COVARIANCE_FUNCTIONS_MEASUREMENT_HPP_ */

From d14361492aece61b1baed3f75d627a8f47dd983a Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Fri, 12 Apr 2024 14:46:59 +1000
Subject: [PATCH 06/13] Some further PIC cleanups

---
 include/albatross/src/models/pic_gp.hpp | 139 +++++++++++++-----------
 tests/test_pic_gp.cc                    | 120 +++++++++++++++++---
 2 files changed, 179 insertions(+), 80 deletions(-)

diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 62078b97..9e5408b5 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -79,18 +79,13 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
       const Eigen::MatrixXd &Z_, const BlockDiagonalLDLT &A_ldlt_,
       const GroupMap &measurement_groups_, const Eigen::VectorXd &information_,
       Eigen::Index numerical_rank_,
-      const std::vector<Eigen::MatrixXd> &covariance_Ynot_,
-      const Eigen::SerializableLDLT &K_PITC_ldlt_,
-      const std::vector<Eigen::SparseMatrix<double>> cols_Bs_,
-      const std::vector<Eigen::SparseMatrix<double>> cols_Cs_)
+      const std::vector<Eigen::SparseMatrix<double>> cols_Bs_)
       : train_features(features_), inducing_features(inducing_features_),
         train_covariance(train_covariance_), sigma_R(sigma_R_), P(P_),
         mean_w(mean_w_), W(W_), covariance_Y(covariance_Y_), Z(Z_),
         A_ldlt(A_ldlt_), measurement_groups(measurement_groups_),
         information(information_), numerical_rank(numerical_rank_),
-        // debug stuff
-        covariance_Ynot(covariance_Ynot_), K_PITC_ldlt(K_PITC_ldlt_),
-        cols_Bs(cols_Bs_), cols_Cs(cols_Cs_) {}
+        cols_Bs(cols_Bs_) {}
 
   void shift_mean(const Eigen::VectorXd &mean_shift) {
     ALBATROSS_ASSERT(mean_shift.size() == information.size());
@@ -111,9 +106,7 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
             information == other.information &&
             numerical_rank == other.numerical_rank &&
             // Debug stuff
-            covariance_Ynot == other.covariance_Ynot &&
-            K_PITC_ldlt == other.K_PITC_ldlt && cols_Bs == other.cols_Bs &&
-            cols_Cs == other.cols_Cs);
+            cols_Bs == other.cols_Bs);
   }
 };
 
@@ -405,9 +398,8 @@ class PICGaussianProcessRegression
     Eigen::MatrixXd K_fu;
     Eigen::VectorXd y;
     GroupMap<FeatureType> measurement_groups;
-    Eigen::SerializableLDLT K_PITC_ldlt;
     compute_internal_components(u, features, targets, &A_ldlt, &K_uu_ldlt,
-                                &K_fu, &y, &measurement_groups, &K_PITC_ldlt);
+                                &K_fu, &y, &measurement_groups);
     auto B_qr = compute_sigma_qr(K_uu_ldlt, A_ldlt, K_fu);
 
     // To make a prediction, we will need to compute cross-terms with
@@ -440,23 +432,21 @@ class PICGaussianProcessRegression
     // std::cout << "W (" << W.rows() << "x" << W.cols() << "):\n"
     //           << W.format(Eigen::FullPrecision) << std::endl;
 
-    const Eigen::MatrixXd W2 =
-        K_uu_ldlt.solve(K_PITC_ldlt.solve(K_fu).transpose());
+    // const Eigen::MatrixXd W2 =
+    //     K_uu_ldlt.solve(K_PITC_ldlt.solve(K_fu).transpose());
     // std::cout << "W2 (" << W2.rows() << "x" << W2.cols() << "):\n"
     //           << W2.format(Eigen::FullPrecision) << std::endl;
 
     // TODO(@peddie): a lot of this can be batched
-    std::vector<Eigen::MatrixXd> covariance_Ynot(measurement_groups.size());
     std::vector<Eigen::MatrixXd> covariance_Y(measurement_groups.size());
     std::vector<Eigen::VectorXd> mean_w(measurement_groups.size());
     std::vector<Eigen::SparseMatrix<double>> cols_Bs(measurement_groups.size());
-    std::vector<Eigen::SparseMatrix<double>> cols_Cs(measurement_groups.size());
     const auto block_start_indices = A_ldlt.block_to_row_map();
 
-    const auto precompute_block =
-        [this, &A_ldlt, &features, &y, &K_fu, &v, &Z, &K_uu_ldlt, &mean_w,
-         &covariance_Y, &covariance_Ynot, &cols_Bs,
-         &cols_Cs](std::size_t block_number, Eigen::Index start_row) -> void {
+    const auto precompute_block = [this, &A_ldlt, &features, &y, &K_fu, &v, &Z,
+                                   &K_uu_ldlt, &mean_w, &covariance_Y,
+                                   &cols_Bs](std::size_t block_number,
+                                             Eigen::Index start_row) -> void {
       // const std::size_t block_number = block_start.first;
       // const Eigen::Index start_row = block_start.second;
       const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
@@ -501,7 +491,6 @@ class PICGaussianProcessRegression
       }
 
       cols_Bs[block_number] = cols_B;
-      cols_Cs[block_number] = cols_C;
 
       // std::cout << "cols_C (" << cols_C.rows() << "x" << cols_C.cols() << "):\n"
       //           << Eigen::MatrixXd(cols_C) << std::endl;
@@ -520,30 +509,30 @@ class PICGaussianProcessRegression
       const Eigen::MatrixXd mean_w_full =
           A_ldlt.blocks[block_number].solve(ydiff_b);
 
-      if (A_ldlt.blocks.size() > 1) {
-        // std::cout << "K_fu (" << K_fu.rows() << "x" << K_fu.cols() <<
-        // "):\n"
-        //           << K_fu << std::endl;
-        const Eigen::MatrixXd KufC = K_fu.transpose() * cols_C;
-        // std::cout << "KufC (" << KufC.rows() << "x" << KufC.cols() << "):\n"
-        //           << KufC << std::endl;
-        // const Eigen::MatrixXd ZC = Z * cols_C;
-        // // std::cout << "ZC.transpose() (" << ZC.transpose().rows() << "x"
-        // //           << ZC.transpose().cols() << "):\n"
-        // //           << ZC.transpose() << std::endl;
-
-        // const Eigen::MatrixXd KuZT = KufC * ZC.transpose();
-        // const Eigen::MatrixXd KuZTZ = KuZT * Z;
-        // const Eigen::MatrixXd KuZTZB = -KuZTZ * cols_B;
-        // covariance_W[block_number] = K_uu_ldlt.solve(KuZTZB);
-        // std::cout << "covariance_W[" << block_number << "]:\n"
-        //           << covariance_W[block_number].format(Eigen::FullPrecision)
-        //           << std::endl;
-        covariance_Ynot[block_number] = K_uu_ldlt.solve(KufC);
-        // covariance_W.emplace_back(K_uu_ldlt.solve(
-        //     (-(K_fu.transpose() * cols_C) * Z.transpose() * Z) *
-        //     cols_B));
-      }
+      // if (A_ldlt.blocks.size() > 1) {
+      //   // std::cout << "K_fu (" << K_fu.rows() << "x" << K_fu.cols() <<
+      //   // "):\n"
+      //   //           << K_fu << std::endl;
+      //   const Eigen::MatrixXd KufC = K_fu.transpose() * cols_C;
+      //   // std::cout << "KufC (" << KufC.rows() << "x" << KufC.cols() << "):\n"
+      //   //           << KufC << std::endl;
+      //   // const Eigen::MatrixXd ZC = Z * cols_C;
+      //   // // std::cout << "ZC.transpose() (" << ZC.transpose().rows() << "x"
+      //   // //           << ZC.transpose().cols() << "):\n"
+      //   // //           << ZC.transpose() << std::endl;
+
+      //   // const Eigen::MatrixXd KuZT = KufC * ZC.transpose();
+      //   // const Eigen::MatrixXd KuZTZ = KuZT * Z;
+      //   // const Eigen::MatrixXd KuZTZB = -KuZTZ * cols_B;
+      //   // covariance_W[block_number] = K_uu_ldlt.solve(KuZTZB);
+      //   // std::cout << "covariance_W[" << block_number << "]:\n"
+      //   //           << covariance_W[block_number].format(Eigen::FullPrecision)
+      //   //           << std::endl;
+      //   covariance_Ynot[block_number] = K_uu_ldlt.solve(KufC);
+      //   // covariance_W.emplace_back(K_uu_ldlt.solve(
+      //   //     (-(K_fu.transpose() * cols_C) * Z.transpose() * Z) *
+      //   //     cols_B));
+      // }
 
       // std::cout << "mean_w_full (" << mean_w_full.rows() << "x"
       //           << mean_w_full.cols() << "):\n"
@@ -676,8 +665,7 @@ class PICGaussianProcessRegression
         Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
     return FitType(features, u, K_uu_ldlt, get_R(*B_qr), get_P(*B_qr), mean_w,
                    W, covariance_Y, Z, A_ldlt, measurement_groups, v,
-                   B_qr->rank(), covariance_Ynot, K_PITC_ldlt, cols_Bs,
-                   cols_Cs);
+                   B_qr->rank(), cols_Bs);
   }
 
   template <typename FeatureType>
@@ -870,7 +858,10 @@ class PICGaussianProcessRegression
     Eigen::MatrixXd WV = Eigen::MatrixXd::Zero(
         sparse_gp_fit.inducing_features.size(), features.size());
 
-    Eigen::VectorXd mean_correction(features.size());
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    // std::cout << "mean_correction before (" << mean_correction.size() << "): "
+    //           << mean_correction.transpose().format(Eigen::FullPrecision)
+    //           << std::endl;
     std::vector<std::size_t> feature_to_block;
     for (Eigen::Index j = 0; j < features.size(); ++j) {
       const auto group = find_group(features[j]);
@@ -881,16 +872,18 @@ class PICGaussianProcessRegression
       //        cover " "the entire feature domain in any fit.");
       feature_to_block.push_back(
           std::distance(sparse_gp_fit.measurement_groups.begin(), group));
-      if (sparse_gp_fit.A_ldlt.blocks.size() > 1) {
+      // if (sparse_gp_fit.A_ldlt.blocks.size() > 1) {
         const std::vector<FeatureType> fvec = {features[j]};
 
         const Eigen::VectorXd features_cov =
             this->covariance_function_(group->second.dataset.features, fvec);
-        // std::cout << "Feature " << j << "(" << without_measurement(features[j])
+        // std::cout << "Feature " << j << "(" <<
+        // without_measurement(features[j])
         //           << ") in group '" << group->first << "' ("
         //           << group->second.block_size << ": "
         //           << group->second.initial_row << " -> "
-        //           << group->second.initial_row + group->second.block_size << ")"
+        //           << group->second.initial_row + group->second.block_size <<
+        //           ")"
         //           << std::endl;
         // std::cout << "inducing_points: "
         //           << sparse_gp_fit.inducing_features.size() << std::endl;
@@ -920,15 +913,30 @@ class PICGaussianProcessRegression
             sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[j]] * Vbp;
         WV.col(j) = wvj;
 
+        // std::cout << "Vbp[" << j << "] (" << Vbp.size()
+        //           << "): " << Vbp.transpose().format(Eigen::FullPrecision)
+        //           << std::endl;
+        // std::cout << "sparse_gp_fit.mean_w[" << group->second.block_index
+        //           << "] ("
+        //           << sparse_gp_fit.mean_w[group->second.block_index].size()
+        //           << "): "
+        //           << sparse_gp_fit.mean_w[group->second.block_index]
+        //                  .transpose()
+        //                  .format(Eigen::FullPrecision)
+        //           << std::endl;
+
         // WV.col(j) =
         //     sparse_gp_fit.covariance_W[group->second.block_index] *
         //     (this->covariance_function_(group->second.dataset.features, fvec)
         //     -
         //      cross_cov.transpose() *
         //          sparse_gp_fit.covariance_Y[group->second.block_index]);
-      }
+      // }
     }
 
+    // std::cout << "mean_correction after (" << mean_correction.size() << "): "
+    //           << mean_correction.transpose().format(Eigen::FullPrecision)
+    //           << std::endl;
     // std::cout << "WV (" << WV.rows() << "x" << WV.cols() << "):\n"
     //           << WV << std::endl;
 
@@ -1169,8 +1177,7 @@ class PICGaussianProcessRegression
       const MarginalDistribution &out_of_order_targets,
       BlockDiagonalLDLT *A_ldlt, Eigen::SerializableLDLT *K_uu_ldlt,
       Eigen::MatrixXd *K_fu, Eigen::VectorXd *y,
-      GroupMap<FeatureType> *measurement_groups,
-      Eigen::SerializableLDLT *K_PITC_ldlt) const {
+      GroupMap<FeatureType> *measurement_groups) const {
 
     ALBATROSS_ASSERT(A_ldlt != nullptr);
     ALBATROSS_ASSERT(K_uu_ldlt != nullptr);
@@ -1259,20 +1266,20 @@ class PICGaussianProcessRegression
 
     *A_ldlt = A.ldlt();
 
-    const Eigen::MatrixXd K_uuuf = K_uu_ldlt->sqrt_solve(K_fu->transpose());
-    Eigen::MatrixXd Kfuf = K_uuuf.transpose() * K_uuuf;
-    Eigen::Index row_offset = 0;
-    Eigen::Index col_offset = 0;
-    for (auto &b : A.blocks) {
-      Kfuf.block(row_offset, col_offset, b.rows(), b.cols()) += b;
-      row_offset += b.rows();
-      col_offset += b.cols();
-    }
+    // const Eigen::MatrixXd K_uuuf = K_uu_ldlt->sqrt_solve(K_fu->transpose());
+    // Eigen::MatrixXd Kfuf = K_uuuf.transpose() * K_uuuf;
+    // Eigen::Index row_offset = 0;
+    // Eigen::Index col_offset = 0;
+    // for (auto &b : A.blocks) {
+    //   Kfuf.block(row_offset, col_offset, b.rows(), b.cols()) += b;
+    //   row_offset += b.rows();
+    //   col_offset += b.cols();
+    // }
 
-    // std::cout << "Kfuf (" << Kfuf.rows() << "x" << Kfuf.cols() << "):\n"
-    //           << Kfuf.format(Eigen::FullPrecision) << std::endl;
+    // // std::cout << "Kfuf (" << Kfuf.rows() << "x" << Kfuf.cols() << "):\n"
+    // //           << Kfuf.format(Eigen::FullPrecision) << std::endl;
 
-    *K_PITC_ldlt = Kfuf.ldlt();
+    // *K_PITC_ldlt = Kfuf.ldlt();
   }
 
   Parameter measurement_nugget_;
diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
index 9879b24f..9bc6b7d1 100644
--- a/tests/test_pic_gp.cc
+++ b/tests/test_pic_gp.cc
@@ -398,6 +398,9 @@ JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
 
   print_matrix(bfp_cov(ordered_features), "BFP: Q_ff + Lambda");
 
+  print_matrix(Q_ff_lambda - bfp_cov(ordered_features),
+               "Q_ff_lambda difference");
+
   const Eigen::SerializableLDLT S = Q_ff_lambda.ldlt();
 
   print_matrix(S.matrixL(), "PITC L");
@@ -448,7 +451,8 @@ JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
   //   // K_Au.middleRows(j, block.cols()) =
   //   //     Eigen::MatrixXd::Zero(block.cols(), K_Au.cols());
   //   // std::stringstream Kaus;
-  //   // std::cout << "Block " << blk << " j = " << j << " size = " << block.cols()
+  //   // std::cout << "Block " << blk << " j = " << j << " size = " <<
+  //   block.cols()
   //   //           << std::endl;
   //   // Kaus << "K_Au[" << blk << "]";
   //   // print_matrix(K_Au, Kaus.str());
@@ -474,6 +478,8 @@ JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
 
   print_matrix(K_PIC, "K_PIC");
   print_matrix(bfp_cov(ordered_features, predict), "BFP: K_PIC");
+
+  print_matrix(K_PIC - bfp_cov(ordered_features, predict), "K_PIC error");
   print_matrix(K_PITC, "K_PITC");
 
   print_matrix(V_PIC, "V_PIC");
@@ -483,7 +489,8 @@ JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
   print_matrix(U, "U");
 
   auto SV = S.sqrt_solve(V_PIC);
-  const Eigen::MatrixXd VSV = V_PIC.transpose() * S.solve(V_PIC);  // SV.transpose() * SV;
+  const Eigen::MatrixXd VSV =
+      V_PIC.transpose() * S.solve(V_PIC); // SV.transpose() * SV;
   print_matrix(VSV, "VSV");
 
   const Eigen::MatrixXd predict_prior = cov(predict);
@@ -492,11 +499,16 @@ JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
 
   print_matrix(bfp_cov(predict), "BFP: prior");
 
+  print_matrix(predict_prior - bfp_cov(predict), "prior error");
+
   // auto KK = K_PITC_ldlt.sqrt_solve(K_PIC);
   // const Eigen::MatrixXd explained_cov = KK.transpose() * KK;
-  const Eigen::MatrixXd explained_cov =
-      K_PIC.transpose() * S.solve(K_PIC);
+  const Eigen::MatrixXd explained_cov = K_PIC.transpose() * S.solve(K_PIC);
   print_matrix(explained_cov, "explained");
+
+  const Eigen::VectorXd PIC_mean =
+      K_PIC.transpose() * S.solve(data.targets.mean);
+  print_vector(PIC_mean, "PIC mean");
   const Eigen::MatrixXd predict_cov = predict_prior - explained_cov;
 
   print_matrix(predict_cov, "K_**");
@@ -511,22 +523,99 @@ JointDistribution test_pic(const RegressionDataset<FeatureType> &data,
   print_matrix(predict_cov - PITC_cov, "PIC - PITC");
   print_matrix(predict_cov - PIC_cov, "PIC - factored");
 
-  return JointDistribution{};
+  return JointDistribution{PIC_mean, PIC_cov};
+}
+
+TEST(TestPicGP, BruteForceEquivalenceOneBlock) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(10);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+  auto bfp_cov = make_brute_force_pic_covariance(
+      strategy(make_simple_covariance_function(), dataset.features),
+      make_simple_covariance_function(), grouper);
+  auto bfp = gp_from_covariance(bfp_cov);
+
+  auto bfp_fit = bfp.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto bfp_pred = bfp_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  std::cout << "BFP mean (" << bfp_pred.mean.size()
+            << "): " << bfp_pred.mean.transpose().format(Eigen::FullPrecision)
+            << std::endl;
+  const double pic_error = (pic_pred.mean - bfp_pred.mean).norm();
+  // const auto test_result =
+  //     test_pic(dataset, test_features, make_simple_covariance_function(),
+  //              strategy, grouper);
+
+  EXPECT_LT(pic_error, 1e-8);
+  // EXPECT_LT((pic_pred.mean - test_result.mean).norm(), 1e-8);
+
+  const double pic_cov_error =
+      (pic_pred.covariance - bfp_pred.covariance).norm();
+  EXPECT_LT(pic_cov_error, 1e-7);
+}
+
+TEST(TestPicGP, BruteForceEquivalenceMultipleBlocks) {
+  static constexpr std::size_t kNumTrainPoints = 10;
+  static constexpr std::size_t kNumTestPoints = 10;
+  static constexpr std::size_t kNumInducingPoints = 5;
+
+  UniformlySpacedInducingPoints strategy(kNumInducingPoints);
+  LeaveOneIntervalOut grouper(2);
+  auto pic = pic_gp_from_covariance(make_simple_covariance_function(), grouper,
+                                    strategy, "pic", DenseQRImplementation{});
+
+  auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
+  auto bfp_cov = make_brute_force_pic_covariance(
+      strategy(make_simple_covariance_function(), dataset.features),
+      make_simple_covariance_function(), grouper);
+  auto bfp = gp_from_covariance(bfp_cov);
+
+  auto bfp_fit = bfp.fit(dataset);
+  auto pic_fit = pic.fit(dataset);
+
+  auto test_features = linspace(0.1, 9.9, kNumTestPoints);
+  auto bfp_pred = bfp_fit.predict_with_measurement_noise(test_features).joint();
+  auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
+  // std::cout << "BFP mean (" << bfp_pred.mean.size()
+  //           << "): " << bfp_pred.mean.transpose().format(Eigen::FullPrecision)
+  //           << std::endl;
+  const double pic_error = (pic_pred.mean - bfp_pred.mean).norm();
+  // const auto test_result =
+  //     test_pic(dataset, test_features, make_simple_covariance_function(),
+  //              strategy, grouper);
+
+  EXPECT_LT(pic_error, 1e-7);
+  // EXPECT_LT((pic_pred.mean - test_result.mean).norm(), 1e-8);
+
+  const double pic_cov_error =
+      (pic_pred.covariance - bfp_pred.covariance).norm();
+  EXPECT_LT(pic_cov_error, 1e-7);
 }
 
 TEST(TestPicGP, EmitCSV) {
-  static constexpr std::size_t kNumTrainPoints = 12;
-  static constexpr std::size_t kNumTestPoints = 40;
-  static constexpr std::size_t kNumInducingPoints = 8;
+  static constexpr std::size_t kNumTrainPoints = 80;
+  static constexpr std::size_t kNumTestPoints = 300;
+  static constexpr std::size_t kNumInducingPoints = 20;
 
   static constexpr double kLargestTrainPoint =
       static_cast<double>(kNumTrainPoints) - 1.;
   static constexpr double kSpatialBuffer = 0;
-  static constexpr std::size_t kNumBlocks = 2;
+  static constexpr std::size_t kNumBlocks = 5;
 
   UniformlySpacedInducingPoints strategy(kNumInducingPoints);
   LeaveOneIntervalOut grouper(kLargestTrainPoint /
                               static_cast<double>(kNumBlocks) + 1e-8);
+  // LeaveOneIntervalOut grouper(10);
 
   auto direct = gp_from_covariance(make_simple_covariance_function(), "direct");
 
@@ -538,6 +627,7 @@ TEST(TestPicGP, EmitCSV) {
 
   auto dataset =
       make_toy_linear_data(-kLargestTrainPoint / 2., 1, 0.1, kNumTrainPoints);
+  // auto dataset = make_toy_linear_data(5, 1, 0.1, kNumTrainPoints);
 
   auto bfp_cov = make_brute_force_pic_covariance(
       strategy(make_simple_covariance_function(), dataset.features),
@@ -546,10 +636,11 @@ TEST(TestPicGP, EmitCSV) {
 
   auto test_features = linspace(
       kSpatialBuffer, kLargestTrainPoint - kSpatialBuffer, kNumTestPoints);
+  // auto test_features = linspace(0.1, 9.9, kNumTestPoints);
   auto direct_fit = direct.fit(dataset);
   auto pic_fit = pic.fit(dataset);
   auto pitc_fit = pitc.fit(dataset);
-  std::cout << "BFP: ";
+  // std::cout << "BFP: ";
   auto bfp_fit = bfp.fit(dataset);
 
   auto direct_pred =
@@ -557,7 +648,7 @@ TEST(TestPicGP, EmitCSV) {
   auto pic_pred = pic_fit.predict_with_measurement_noise(test_features).joint();
   auto pitc_pred =
       pitc_fit.predict_with_measurement_noise(test_features).joint();
-  std::cout << "BFP: ";
+  // std::cout << "BFP: ";
   auto bfp_pred = bfp_fit.predict_with_measurement_noise(test_features).joint();
 
   std::ofstream csv_out(
@@ -565,7 +656,7 @@ TEST(TestPicGP, EmitCSV) {
 
   csv_out << "type,idx,x,mean,marginal,group\n";
   for (std::size_t pred = 0; pred < direct_pred.size(); ++pred) {
-    csv_out << "direct," << std::setprecision(16) << pred << ','
+    csv_out << "dense," << std::setprecision(16) << pred << ','
             << test_features[pred] << ',' << direct_pred.mean[pred] << ','
             << direct_pred.covariance(pred, pred) << ','
             << grouper(test_features[pred]) << '\n';
@@ -629,7 +720,8 @@ TEST(TestPicGP, EmitCSV) {
   //           << bfp_pred.covariance << std::endl;
 
   const double pic_error = (pic_pred.mean - direct_pred.mean).norm();
-  EXPECT_LT(pic_error, 1e-7);
+  EXPECT_LT(pic_error, 5e-7);
+  // EXPECT_LT((pic_pred.mean - test_result.mean).norm(), 1e-7);
   // << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
   // << "; |p|: " << test_features.size()
   // << "; B width: " << grouper.group_domain_size << "\n"
@@ -638,7 +730,7 @@ TEST(TestPicGP, EmitCSV) {
 
   const double pic_cov_error =
       (pic_pred.covariance - direct_pred.covariance).norm();
-  EXPECT_LT(pic_cov_error, 1e-7);
+  EXPECT_LT(pic_cov_error, 5e-7);
   // << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
   // << "; |p|: " << test_features.size()
   // << "; B width: " << grouper.group_domain_size

From ff34578cfc5e4b3c90d768632e890810a28d2466 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Thu, 18 Apr 2024 16:31:31 +1000
Subject: [PATCH 07/13] More PIC changes to support ionosphere tests

---
 BUILD.bazel                                   |  1 +
 include/albatross/serialize/GP                |  1 +
 .../albatross/src/cereal/block_diagonal.hpp   | 27 +++++++++++++++++++
 include/albatross/src/cereal/gp.hpp           | 24 +++++++++++++++++
 include/albatross/src/models/pic_gp.hpp       | 12 ++++-----
 5 files changed, 59 insertions(+), 6 deletions(-)
 create mode 100644 include/albatross/src/cereal/block_diagonal.hpp

diff --git a/BUILD.bazel b/BUILD.bazel
index 74e2d111..f0adc5d4 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -48,6 +48,7 @@ cc_library(
         "include/albatross/serialize/Ransac",
         "include/albatross/src/cereal/ThreadPool.hpp",
         "include/albatross/src/cereal/block_utils.hpp",
+        "include/albatross/src/cereal/block_diagonal.hpp",
         "include/albatross/src/cereal/dataset.hpp",
         "include/albatross/src/cereal/distribution.hpp",
         "include/albatross/src/cereal/eigen.hpp",
diff --git a/include/albatross/serialize/GP b/include/albatross/serialize/GP
index f54633c5..2ecee09c 100644
--- a/include/albatross/serialize/GP
+++ b/include/albatross/serialize/GP
@@ -18,6 +18,7 @@
 
 #include "../src/cereal/block_utils.hpp"
 #include "../src/cereal/serializable_ldlt.hpp"
+#include "../src/cereal/block_diagonal.hpp"
 #include "../src/cereal/gp.hpp"
 #include "../src/cereal/representations.hpp"
 #include "../src/cereal/serializable_spqr.hpp"
diff --git a/include/albatross/src/cereal/block_diagonal.hpp b/include/albatross/src/cereal/block_diagonal.hpp
new file mode 100644
index 00000000..964602bd
--- /dev/null
+++ b/include/albatross/src/cereal/block_diagonal.hpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2019 Swift Navigation Inc.
+ * Contact: Swift Navigation <dev@swiftnav.com>
+ *
+ * This source is subject to the license found in the file 'LICENSE' which must
+ * be distributed together with this source. All other rights reserved.
+ *
+ * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
+ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef ALBATROSS_SRC_CEREAL_BLOCK_DIAGONAL_HPP_
+#define ALBATROSS_SRC_CEREAL_BLOCK_DIAGONAL_HPP_
+
+namespace cereal {
+
+template <typename Archive>
+inline void serialize(Archive &archive,
+                      const albatross::BlockDiagonalLDLT &ldlt,
+                      const std::uint32_t version) {
+  archive(cereal::make_nvp("blocks", ldlt.blocks));
+}
+
+} // namespace cereal
+
+#endif /* ALBATROSS_SRC_CEREAL_BLOCK_DIAGONAL_HPP_ */
diff --git a/include/albatross/src/cereal/gp.hpp b/include/albatross/src/cereal/gp.hpp
index b3926397..9b421b83 100644
--- a/include/albatross/src/cereal/gp.hpp
+++ b/include/albatross/src/cereal/gp.hpp
@@ -17,7 +17,9 @@ using albatross::Fit;
 using albatross::GaussianProcessBase;
 using albatross::GPFit;
 using albatross::LinearCombination;
+
 using albatross::SparseGPFit;
+using albatross::PICGPFit;
 
 #ifndef GP_SERIALIZATION_VERSION
 #define GP_SERIALIZATION_VERSION 2
@@ -51,6 +53,28 @@ inline void serialize(Archive &archive, Fit<SparseGPFit<FeatureType>> &fit,
   }
 }
 
+template <typename Archive, typename GrouperFunction,
+          typename InducingFeatureType, typename FeatureType>
+inline void
+serialize(Archive &archive,
+          Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> &fit,
+          const std::uint32_t version) {
+  archive(cereal::make_nvp("train_features", fit.train_features));
+  archive(cereal::make_nvp("inducing_features", fit.inducing_features));
+  archive(cereal::make_nvp("train_covariance", fit.train_covariance));
+  archive(cereal::make_nvp("sigma_R", fit.sigma_R));
+  archive(cereal::make_nvp("P", fit.P));
+  archive(cereal::make_nvp("mean_w", fit.mean_w));
+  archive(cereal::make_nvp("W", fit.W));
+  archive(cereal::make_nvp("covariance_Y", fit.covariance_Y));
+  archive(cereal::make_nvp("Z", fit.Z));
+  archive(cereal::make_nvp("A_ldlt", fit.A_ldlt));
+  archive(cereal::make_nvp("measurement_groups", fit.measurement_groups));
+  archive(cereal::make_nvp("information", fit.information));
+  archive(cereal::make_nvp("numerical_rank", fit.numerical_rank));
+  archive(cereal::make_nvp("cols_Bs", fit.cols_Bs));
+}
+
 template <typename Archive, typename CovFunc, typename MeanFunc,
           typename ImplType>
 inline void save(Archive &archive,
diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 9e5408b5..22878fa3 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -63,10 +63,10 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
   Eigen::Index numerical_rank;
 
   // debug stuff
-  std::vector<Eigen::MatrixXd> covariance_Ynot;
-  Eigen::SerializableLDLT K_PITC_ldlt;
+  // std::vector<Eigen::MatrixXd> covariance_Ynot;
+  // Eigen::SerializableLDLT K_PITC_ldlt;
   std::vector<Eigen::SparseMatrix<double>> cols_Bs;
-  std::vector<Eigen::SparseMatrix<double>> cols_Cs;
+  // std::vector<Eigen::SparseMatrix<double>> cols_Cs;
 
   Fit(){};
 
@@ -721,7 +721,7 @@ class PICGaussianProcessRegression
     const Eigen::MatrixXd sigma_inv_sqrt = C_ldlt.sqrt_solve(K_zz);
     const auto B_qr = QRImplementation::compute(sigma_inv_sqrt, nullptr);
 
-    new_fit.permutation_indices = get_P(*B_qr);
+    new_fit.P = get_P(*B_qr);
     new_fit.sigma_R = get_R(*B_qr);
     new_fit.numerical_rank = B_qr->rank();
 
@@ -770,8 +770,8 @@ class PICGaussianProcessRegression
         Q_sqrt.cwiseProduct(Q_sqrt).array().colwise().sum();
     marginal_variance -= Q_diag;
 
-    const Eigen::MatrixXd S_sqrt = sqrt_solve(
-        sparse_gp_fit.sigma_R, sparse_gp_fit.permutation_indices, cross_cov);
+    const Eigen::MatrixXd S_sqrt =
+        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P.indices(), cross_cov);
     const Eigen::VectorXd S_diag =
         S_sqrt.cwiseProduct(S_sqrt).array().colwise().sum();
     marginal_variance += S_diag;

From c781405de9191d771e20f82f7db5f20d3a260d88 Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Fri, 26 Apr 2024 15:17:02 +1000
Subject: [PATCH 08/13] Fixups to PIC

 - Implement basic mean / marginal prediction

 - save grouping function in the fit

 - allow updating the grouping function directly

 - various cleanups
---
 include/albatross/src/models/pic_gp.hpp | 213 ++++++++++++++++--------
 1 file changed, 147 insertions(+), 66 deletions(-)

diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 22878fa3..96714b16 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -61,11 +61,12 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
   GroupMap measurement_groups;
   Eigen::VectorXd information;
   Eigen::Index numerical_rank;
+  std::vector<Eigen::SparseMatrix<double>> cols_Bs;
+  GrouperFunction grouper;
 
   // debug stuff
   // std::vector<Eigen::MatrixXd> covariance_Ynot;
   // Eigen::SerializableLDLT K_PITC_ldlt;
-  std::vector<Eigen::SparseMatrix<double>> cols_Bs;
   // std::vector<Eigen::SparseMatrix<double>> cols_Cs;
 
   Fit(){};
@@ -79,13 +80,14 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
       const Eigen::MatrixXd &Z_, const BlockDiagonalLDLT &A_ldlt_,
       const GroupMap &measurement_groups_, const Eigen::VectorXd &information_,
       Eigen::Index numerical_rank_,
-      const std::vector<Eigen::SparseMatrix<double>> cols_Bs_)
+      const std::vector<Eigen::SparseMatrix<double>> cols_Bs_,
+      GrouperFunction grouper_)
       : train_features(features_), inducing_features(inducing_features_),
         train_covariance(train_covariance_), sigma_R(sigma_R_), P(P_),
         mean_w(mean_w_), W(W_), covariance_Y(covariance_Y_), Z(Z_),
         A_ldlt(A_ldlt_), measurement_groups(measurement_groups_),
         information(information_), numerical_rank(numerical_rank_),
-        cols_Bs(cols_Bs_) {}
+        cols_Bs(cols_Bs_), grouper(grouper_) {}
 
   void shift_mean(const Eigen::VectorXd &mean_shift) {
     ALBATROSS_ASSERT(mean_shift.size() == information.size());
@@ -105,7 +107,6 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
             measurement_groups == other.measurement_groups &&
             information == other.information &&
             numerical_rank == other.numerical_rank &&
-            // Debug stuff
             cols_Bs == other.cols_Bs);
   }
 };
@@ -302,12 +303,21 @@ class PICGaussianProcessRegression
     ALBATROSS_ASSERT(success);
   }
 
+  template <typename NewGrouper = GrouperFunction,
+            typename PleaseDontChange = typename std::enable_if<
+                std::is_same<typename std::remove_reference<NewGrouper>::type,
+                             GrouperFunction>::value>::type>
+  void update_grouper_function(NewGrouper &&f) {
+    independent_group_function_ = std::forward<NewGrouper>(f);
+  }
+
   template <typename FeatureType, typename InducingPointFeatureType>
   auto
   _update_impl(const Fit<PICGPFit<GrouperFunction, InducingPointFeatureType,
                                   FeatureType>> &old_fit,
                const std::vector<FeatureType> &features,
                const MarginalDistribution &targets) const {
+    assert(false && "Cannot call update() on a PIC GP yet!");
 
     BlockDiagonalLDLT A_ldlt;
     Eigen::SerializableLDLT K_uu_ldlt;
@@ -439,14 +449,14 @@ class PICGaussianProcessRegression
 
     // TODO(@peddie): a lot of this can be batched
     std::vector<Eigen::MatrixXd> covariance_Y(measurement_groups.size());
+    std::vector<Eigen::MatrixXd> covariance_Ynot(measurement_groups.size());
     std::vector<Eigen::VectorXd> mean_w(measurement_groups.size());
     std::vector<Eigen::SparseMatrix<double>> cols_Bs(measurement_groups.size());
+    std::vector<Eigen::SparseMatrix<double>> cols_Cs(measurement_groups.size());
     const auto block_start_indices = A_ldlt.block_to_row_map();
 
-    const auto precompute_block = [this, &A_ldlt, &features, &y, &K_fu, &v, &Z,
-                                   &K_uu_ldlt, &mean_w, &covariance_Y,
-                                   &cols_Bs](std::size_t block_number,
-                                             Eigen::Index start_row) -> void {
+    const auto precompute_block = [&, this](std::size_t block_number,
+                                            Eigen::Index start_row) -> void {
       // const std::size_t block_number = block_start.first;
       // const Eigen::Index start_row = block_start.second;
       const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
@@ -665,7 +675,7 @@ class PICGaussianProcessRegression
         Fit<PICGPFit<GrouperFunction, InducingPointFeatureType, FeatureType>>;
     return FitType(features, u, K_uu_ldlt, get_R(*B_qr), get_P(*B_qr), mean_w,
                    W, covariance_Y, Z, A_ldlt, measurement_groups, v,
-                   B_qr->rank(), cols_Bs);
+                   B_qr->rank(), cols_Bs, independent_group_function_);
   }
 
   template <typename FeatureType>
@@ -728,36 +738,138 @@ class PICGaussianProcessRegression
     return output;
   }
 
-  // This is included to allow the SparseGP to be compatible with fits
-  // generated using a standard GP.
+  // This is included to allow the SparseGP
+  // to be compatible with fits generated
+  // using a standard GP.
   using Base::_predict_impl;
 
-  template <typename FeatureType, typename FitFeaturetype>
+  template <typename FeatureType, typename InducingFeatureType,
+            typename FitFeatureType>
   Eigen::VectorXd _predict_impl(
       const std::vector<FeatureType> &features,
-      const Fit<PICGPFit<GrouperFunction, FitFeaturetype, FeatureType>>
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
           &sparse_gp_fit,
       PredictTypeIdentity<Eigen::VectorXd> &&) const {
-    const auto cross_cov =
-        this->covariance_function_(sparse_gp_fit.train_features, features);
+    const auto find_group = [this, &sparse_gp_fit](const auto &feature) {
+      const auto group = sparse_gp_fit.measurement_groups.find(
+          independent_group_function_(without_measurement(feature)));
+      if (group == sparse_gp_fit.measurement_groups.end()) {
+        std::cerr << "Group mapping failure for feature '"
+                  << without_measurement(feature) << "' (group index '"
+                  << independent_group_function_(without_measurement(feature))
+                  << "')!" << std::endl;
+        assert(group != sparse_gp_fit.measurement_groups.end() &&
+               "TODO(@peddie): the group function in a PIC GP model must "
+               "cover the entire feature domain in any fit.");
+      }
+      return group;
+    };
+    const Eigen::MatrixXd K_up =
+        this->covariance_function_(sparse_gp_fit.inducing_features, features);
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      const auto group = find_group(features[j]);
+      const std::vector<FeatureType> fvec = {features[j]};
+
+      const Eigen::VectorXd features_cov =
+          this->covariance_function_(group->second.dataset.features, fvec);
+      const Eigen::VectorXd kpuy =
+          K_up.transpose().row(j) *
+          sparse_gp_fit.covariance_Y[group->second.block_index];
+      const Eigen::VectorXd Vbp = features_cov - kpuy;
+      mean_correction[j] =
+          Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
+    }
+
     Eigen::VectorXd mean =
-        gp_mean_prediction(cross_cov, sparse_gp_fit.information);
+        K_up.transpose() * sparse_gp_fit.information + mean_correction;
+
     this->mean_function_.add_to(features, &mean);
     return mean;
   }
 
-  template <typename FeatureType, typename FitFeaturetype>
+  template <typename FeatureType, typename InducingFeatureType,
+            typename FitFeatureType>
   MarginalDistribution _predict_impl(
       const std::vector<FeatureType> &features,
-      const Fit<PICGPFit<GrouperFunction, FitFeaturetype, FeatureType>>
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
           &sparse_gp_fit,
       PredictTypeIdentity<MarginalDistribution> &&) const {
-    const auto cross_cov =
-        this->covariance_function_(sparse_gp_fit.train_features, features);
-    Eigen::VectorXd mean =
-        gp_mean_prediction(cross_cov, sparse_gp_fit.information);
+    // std::cout << "_predict_impl() for MarginalDistribution" << std::endl;
+    const auto K_up =
+        this->covariance_function_(sparse_gp_fit.inducing_features, features);
+    Eigen::VectorXd mean = gp_mean_prediction(K_up, sparse_gp_fit.information);
     this->mean_function_.add_to(features, &mean);
 
+    Eigen::MatrixXd WV = Eigen::MatrixXd::Zero(
+        sparse_gp_fit.inducing_features.size(), features.size());
+
+    const auto find_group = [this, &sparse_gp_fit](const auto &feature) {
+      const auto group = sparse_gp_fit.measurement_groups.find(
+          independent_group_function_(without_measurement(feature)));
+      if (group == sparse_gp_fit.measurement_groups.end()) {
+        std::cerr << "Group mapping failure for feature '"
+                  << without_measurement(feature) << "' (group index '"
+                  << independent_group_function_(without_measurement(feature))
+                  << "')!" << std::endl;
+        assert(group != sparse_gp_fit.measurement_groups.end() &&
+               "TODO(@peddie): the group function in a PIC GP model must "
+               "cover the entire feature domain in any fit.");
+      }
+      return group;
+    };
+
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    std::vector<std::size_t> feature_to_block;
+    for (Eigen::Index j = 0; j < features.size(); ++j) {
+      const auto group = find_group(features[j]);
+      feature_to_block.push_back(
+          std::distance(sparse_gp_fit.measurement_groups.begin(), group));
+      const std::vector<FeatureType> fvec = {features[j]};
+
+      const Eigen::VectorXd features_cov =
+          this->covariance_function_(group->second.dataset.features, fvec);
+      const Eigen::VectorXd kpuy =
+          K_up.transpose().row(j) *
+          sparse_gp_fit.covariance_Y[group->second.block_index];
+      const Eigen::VectorXd Vbp = features_cov - kpuy;
+      mean_correction[j] =
+          Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
+      const Eigen::VectorXd wvj =
+          sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[j]] * Vbp;
+      WV.col(j) = wvj;
+    }
+
+    Eigen::VectorXd VSV_diag(features.size());
+
+    for (Eigen::Index row = 0; row < VSV_diag.size(); ++row) {
+      assert(row < feature_to_block.size());
+      const auto row_group = find_group(features[row]);
+      // TODO(@peddie): these are K, not V!
+      const Eigen::VectorXd Q_row_p =
+          K_up.transpose().row(row) *
+          sparse_gp_fit.covariance_Y[row_group->second.block_index];
+      const std::vector<FeatureType> row_fvec = {features[row]};
+      const Eigen::VectorXd V_row_p =
+          this->covariance_function_(row_group->second.dataset.features,
+                                     row_fvec) -
+          Q_row_p;
+
+      Eigen::VectorXd xi_a = sparse_gp_fit.A_ldlt.blocks[feature_to_block[row]]
+                                 .sqrt_solve(V_row_p)
+                                 .col(0);
+
+      Eigen::VectorXd xi_z =
+          sparse_gp_fit.Z.block(0, row_group->second.initial_row,
+                                sparse_gp_fit.Z.rows(),
+                                row_group->second.block_size) *
+          V_row_p;
+
+      VSV_diag(row) = xi_a.dot(xi_a) - xi_z.dot(xi_z);
+    }
+
+    const Eigen::VectorXd U_diag = (K_up.transpose() * WV).diagonal();
+
     Eigen::VectorXd marginal_variance(cast::to_index(features.size()));
     for (Eigen::Index i = 0; i < marginal_variance.size(); ++i) {
       marginal_variance[i] = this->covariance_function_(
@@ -765,66 +877,35 @@ class PICGaussianProcessRegression
     }
 
     const Eigen::MatrixXd Q_sqrt =
-        sparse_gp_fit.train_covariance.sqrt_solve(cross_cov);
+        sparse_gp_fit.train_covariance.sqrt_solve(K_up);
     const Eigen::VectorXd Q_diag =
         Q_sqrt.cwiseProduct(Q_sqrt).array().colwise().sum();
     marginal_variance -= Q_diag;
 
     const Eigen::MatrixXd S_sqrt =
-        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P.indices(), cross_cov);
+        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P, K_up);
     const Eigen::VectorXd S_diag =
         S_sqrt.cwiseProduct(S_sqrt).array().colwise().sum();
     marginal_variance += S_diag;
 
-    return MarginalDistribution(mean, marginal_variance);
-  }
+    mean += mean_correction;
 
-  // template <typename FeatureType, typename FitType>
-  // auto find_group(const FitType &fit, const FeatureType &feature) const {
-  //   const auto group = fit.measurement_groups.find(
-  //       independent_group_function_(without_measurement(feature)));
-  //   if (group == fit.measurement_groups.end()) {
-  //     std::cerr << "Group mapping failure for feature '"
-  //               << without_measurement(feature) << "' (group index '"
-  //               << independent_group_function_(without_measurement(feature))
-  //               << "')!" << std::endl;
-  //     assert(group != fit.measurement_groups.end() &&
-  //            "TODO(@peddie): the group function in a PIC GP model must cover
-  //            " "the entire feature domain in any fit.");
-  //   }
-  //   return group;
-  // }
-  // template <typename FitFeatureType>
-  // using InducingFeatureType =
-  //     typename std::decay<typename std::result_of<InducingPointStrategy(
-  //         const CovFunc &,
-  //         const std::vector<FitFeatureType> &)>::type::value_type>::type;
-
-  // typename std::decay<decltype(InducingPointStrategy>(
-  //     std::declval<CovFunc>(), std::declval<FitFeatureType>()))>::type;
-
-  // template <typename FitFeatureType>
-  // using InducingFeatureType =
-  //     typename std::decay<typename decltype(inducing_point_strategy_(
-  //         std::declval<const CovFunc &>(),
-  //         std::declval<const std::vector<FitFeatureType>
-  //         &>()))::value_type>:: type;
+    return MarginalDistribution(mean,
+                                marginal_variance - (2 * U_diag + VSV_diag));
+  }
 
   template <typename FeatureType, typename InducingFeatureType,
             typename FitFeatureType>
   JointDistribution _predict_impl(
       const std::vector<FeatureType> &features,
-      const Fit<PICGPFit<
-          GrouperFunction,
-          InducingFeatureType, // <FitFeatureType>,
-                               // typename std::result_of<InducingPointStrategy(
-                               //     const CovFunc &, const
-                               //     std::vector<FitFeatureType> &)>::type,
-          FitFeatureType>> &sparse_gp_fit,
-      // const Fit<PICGPFit<GrouperFunction,
-      // InducingFeatureType<FitFeatureType>,
-      //                    FitFeatureType>> &sparse_gp_fit,
+      const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
+          &sparse_gp_fit,
       PredictTypeIdentity<JointDistribution> &&) const {
+    // std::cout << "features (" << features.size() << "): ";
+    // for (const auto &f : features) {
+    //   std::cout << f << "  ";
+    // }
+    // std::cout << std::endl;
 
     const auto find_group = [this, &sparse_gp_fit](const auto &feature) {
       const auto group = sparse_gp_fit.measurement_groups.find(

From 7c63e24b47dea7edbdb69ed652b20682222e749f Mon Sep 17 00:00:00 2001
From: Matt Peddie <matt.peddie@swift-nav.com>
Date: Fri, 26 Apr 2024 15:40:37 +1000
Subject: [PATCH 09/13] Clean up a bunch of PIC debug stuff

---
 include/albatross/src/models/pic_gp.hpp | 362 ++----------------------
 1 file changed, 27 insertions(+), 335 deletions(-)

diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 96714b16..14993699 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -54,7 +54,6 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
   Eigen::PermutationMatrixX P;
   std::vector<Eigen::VectorXd> mean_w;
   Eigen::MatrixXd W;
-  // std::vector<Eigen::MatrixXd> covariance_W;
   std::vector<Eigen::MatrixXd> covariance_Y;
   Eigen::MatrixXd Z;
   BlockDiagonalLDLT A_ldlt;
@@ -64,11 +63,6 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
   std::vector<Eigen::SparseMatrix<double>> cols_Bs;
   GrouperFunction grouper;
 
-  // debug stuff
-  // std::vector<Eigen::MatrixXd> covariance_Ynot;
-  // Eigen::SerializableLDLT K_PITC_ldlt;
-  // std::vector<Eigen::SparseMatrix<double>> cols_Cs;
-
   Fit(){};
 
   Fit(const std::vector<FeatureType> &features_,
@@ -106,8 +100,7 @@ struct Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> {
             A_ldlt == other.A_ldlt &&
             measurement_groups == other.measurement_groups &&
             information == other.information &&
-            numerical_rank == other.numerical_rank &&
-            cols_Bs == other.cols_Bs);
+            numerical_rank == other.numerical_rank && cols_Bs == other.cols_Bs);
   }
 };
 
@@ -225,8 +218,6 @@ class PICGaussianProcessRegression
                                  PICGaussianProcessRegression<
                                      CovFunc, MeanFunc, GrouperFunction,
                                      InducingPointStrategy, QRImplementation>> {
-  InducingPointStrategy inducing_point_strategy_;
-
 public:
   using Base = GaussianProcessBase<
       CovFunc, MeanFunc,
@@ -400,8 +391,6 @@ class PICGaussianProcessRegression
     const auto u =
         inducing_point_strategy_(this->covariance_function_, features);
     ALBATROSS_ASSERT(u.size() > 0 && "Empty inducing points!");
-    // std::cerr << "features: " << features.size() << std::endl;
-    // std::cerr << "u: " << u.size() << std::endl;
 
     BlockDiagonalLDLT A_ldlt;
     Eigen::SerializableLDLT K_uu_ldlt;
@@ -426,10 +415,6 @@ class PICGaussianProcessRegression
     // appropriately, and if the term is diagonal, also solve using
     // the blocks of A^-1.
     const Eigen::MatrixXd Z = sqrt_solve(*B_qr, A_ldlt.solve(K_fu).transpose());
-        // get_R(*B_qr).template triangularView<Eigen::Upper>().transpose().solve(
-        //     get_P(*B_qr).transpose() * A_ldlt.solve(K_fu).transpose());
-    // std::cout << "Z (" << Z.rows() << "x" << Z.cols() << "):\n"
-    //           << Z << std::endl;
 
     Eigen::VectorXd y_augmented = Eigen::VectorXd::Zero(B_qr->rows());
     y_augmented.topRows(y.size()) = A_ldlt.sqrt_solve(y, Base::threads_.get());
@@ -437,15 +422,8 @@ class PICGaussianProcessRegression
 
     using InducingPointFeatureType = typename std::decay<decltype(u[0])>::type;
 
-    const Eigen::MatrixXd W =
-      K_uu_ldlt.solve((A_ldlt.solve(K_fu) - Z.transpose() * Z * K_fu).transpose());
-    // std::cout << "W (" << W.rows() << "x" << W.cols() << "):\n"
-    //           << W.format(Eigen::FullPrecision) << std::endl;
-
-    // const Eigen::MatrixXd W2 =
-    //     K_uu_ldlt.solve(K_PITC_ldlt.solve(K_fu).transpose());
-    // std::cout << "W2 (" << W2.rows() << "x" << W2.cols() << "):\n"
-    //           << W2.format(Eigen::FullPrecision) << std::endl;
+    const Eigen::MatrixXd W = K_uu_ldlt.solve(
+        (A_ldlt.solve(K_fu) - Z.transpose() * Z * K_fu).transpose());
 
     // TODO(@peddie): a lot of this can be batched
     std::vector<Eigen::MatrixXd> covariance_Y(measurement_groups.size());
@@ -457,8 +435,6 @@ class PICGaussianProcessRegression
 
     const auto precompute_block = [&, this](std::size_t block_number,
                                             Eigen::Index start_row) -> void {
-      // const std::size_t block_number = block_start.first;
-      // const Eigen::Index start_row = block_start.second;
       const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
       // K_fu is already computed, so we can form K_uB and K_uA by
       // appropriate use of sparse indexing matrices and avoid an O(N
@@ -477,11 +453,6 @@ class PICGaussianProcessRegression
       }
       cols_B.makeCompressed();
 
-      // std::cout << "block " << block_number << " -- start_row: " << start_row
-      //           << "; block_size: " << block_size << std::endl;
-      // std::cout << "cols_B (" << cols_B.rows() << "x" << cols_B.cols() << "):\n"
-      //           << Eigen::MatrixXd(cols_B) << std::endl;
-
       for (Eigen::Index i = 0, j = 0; i < features.size();) {
         if (i == start_row) {
           i += block_size;
@@ -501,174 +472,15 @@ class PICGaussianProcessRegression
       }
 
       cols_Bs[block_number] = cols_B;
-
-      // std::cout << "cols_C (" << cols_C.rows() << "x" << cols_C.cols() << "):\n"
-      //           << Eigen::MatrixXd(cols_C) << std::endl;
       // v_b \in R^b = A_BB^-1 (y_b - K_Bu v)
       Eigen::MatrixXd ydiff_b = cols_B.transpose() * (y - K_fu * v);
-      // std::cerr << "ydiff_b: " << ydiff_b.rows() << " x " << ydiff_b.cols()
-      //           << std::endl;
-      // std::cerr << "cols_B: " << cols_B.rows() << " x " << cols_B.cols()
-      //           << std::endl;
-      // std::cerr << "A_ldlt: " << A_ldlt.rows() << " x " << A_ldlt.cols()
-      //           << " in " << A_ldlt.blocks.size() << " blocks: { ";
-      // for (const auto &block : A_ldlt.blocks) {
-      //   std::cerr << block.rows() << ", ";
-      // }
-      // std::cerr << " }" << std::endl;
       const Eigen::MatrixXd mean_w_full =
           A_ldlt.blocks[block_number].solve(ydiff_b);
-
-      // if (A_ldlt.blocks.size() > 1) {
-      //   // std::cout << "K_fu (" << K_fu.rows() << "x" << K_fu.cols() <<
-      //   // "):\n"
-      //   //           << K_fu << std::endl;
-      //   const Eigen::MatrixXd KufC = K_fu.transpose() * cols_C;
-      //   // std::cout << "KufC (" << KufC.rows() << "x" << KufC.cols() << "):\n"
-      //   //           << KufC << std::endl;
-      //   // const Eigen::MatrixXd ZC = Z * cols_C;
-      //   // // std::cout << "ZC.transpose() (" << ZC.transpose().rows() << "x"
-      //   // //           << ZC.transpose().cols() << "):\n"
-      //   // //           << ZC.transpose() << std::endl;
-
-      //   // const Eigen::MatrixXd KuZT = KufC * ZC.transpose();
-      //   // const Eigen::MatrixXd KuZTZ = KuZT * Z;
-      //   // const Eigen::MatrixXd KuZTZB = -KuZTZ * cols_B;
-      //   // covariance_W[block_number] = K_uu_ldlt.solve(KuZTZB);
-      //   // std::cout << "covariance_W[" << block_number << "]:\n"
-      //   //           << covariance_W[block_number].format(Eigen::FullPrecision)
-      //   //           << std::endl;
-      //   covariance_Ynot[block_number] = K_uu_ldlt.solve(KufC);
-      //   // covariance_W.emplace_back(K_uu_ldlt.solve(
-      //   //     (-(K_fu.transpose() * cols_C) * Z.transpose() * Z) *
-      //   //     cols_B));
-      // }
-
-      // std::cout << "mean_w_full (" << mean_w_full.rows() << "x"
-      //           << mean_w_full.cols() << "):\n"
-      //           << mean_w_full << std::endl;
-      // const Eigen::MatrixXd mean_w_block = cols_B.transpose() *
-      // mean_w_full; std::cout << "mean_w_block (" << mean_w_block.rows()
-      // << "x"
-      //           << mean_w_block.cols() << "):\n"
-      //           << mean_w_block << std::endl;
-      // mean_w.emplace_back(cols_B.transpose() *
-      //                     A_ldlt.blocks[block_number].solve(ydiff_b));
       mean_w[block_number] = mean_w_full;
       // Y \in R^(u x b) = K_uu^-1 K_uB
       covariance_Y[block_number] = K_uu_ldlt.solve(K_fu.transpose() * cols_B);
-      // std::cout << "covariance_Y[" << block_number << "]:\n"
-      //           << covariance_Y[block_number].format(Eigen::FullPrecision)
-      //           << std::endl;
-      // W \in R^(u x b) = K_uu^-1 K_uC S_CB^-1
-      //                 = K_uu^-1 K_uC (A^-1 - Z^T Z)
-      //                 [A^-1 is block diagonal; C and B are disjoint]
-      //                 = K_uu^-1 K_uC (- Z^T Z)
     };
 
-    // for (const auto &block_start : block_start_indices) {
-    //   const std::size_t block_number = block_start.first;
-    //   const Eigen::Index start_row = block_start.second;
-    //   const Eigen::Index block_size = A_ldlt.blocks[block_number].rows();
-    //   // K_fu is already computed, so we can form K_uB and K_uA by
-    //   // appropriate use of sparse indexing matrices and avoid an O(N
-    //   // M) operation.
-    //   //
-    //   // This nonsense would be a lot more straightforward with Eigen
-    //   // 3.4's slicing and indexing API.
-    //   Eigen::SparseMatrix<double> cols_B(features.size(), block_size);
-    //   cols_B.reserve(block_size);
-    //   Eigen::SparseMatrix<double> cols_C(features.size(),
-    //                                      features.size() - block_size);
-    //   cols_C.reserve(features.size() - block_size);
-
-    //   for (Eigen::Index i = 0; i < block_size; ++i) {
-    //     cols_B.insert(start_row + i, i) = 1.;
-    //   }
-    //   cols_B.makeCompressed();
-
-    //   // std::cout << "cols_B (" << cols_B.rows() << "x" << cols_B.cols() <<
-    //   "):\n"
-    //   //           << Eigen::MatrixXd(cols_B) << std::endl;
-
-    //   // std::cout << "start_row: " << start_row << "; block_size: " <<
-    //   block_size
-    //   //           << std::endl;
-    //   for (Eigen::Index i = 0, j = 0; i < features.size();) {
-    //     if (i == start_row) {
-    //       i += block_size;
-    //       continue;
-    //     }
-    //     cols_C.insert(i, j) = 1.;
-    //     i++;
-    //     j++;
-    //   }
-    //   cols_C.makeCompressed();
-
-    //   Eigen::Index col = 0;
-    //   for (Eigen::Index k = 0; k < cols_C.outerSize(); ++k) {
-    //     for (decltype(cols_C)::InnerIterator it(cols_C, k); it; ++it) {
-    //       assert(it.col() == col++);
-    //     }
-    //   }
-    //   // std::cout << "cols_C (" << cols_C.rows() << "x" << cols_C.cols() <<
-    //   "):\n"
-    //   //           << Eigen::MatrixXd(cols_C) << std::endl;
-    //   // v_b \in R^b = A_BB^-1 (y_b - K_Bu v)
-    //   Eigen::MatrixXd ydiff_b = cols_B.transpose() * (y - K_fu * v);
-    //   // std::cerr << "ydiff_b: " << ydiff_b.rows() << " x " <<
-    //   ydiff_b.cols()
-    //   //           << std::endl;
-    //   // std::cerr << "cols_B: " << cols_B.rows() << " x " << cols_B.cols()
-    //   //           << std::endl;
-    //   // std::cerr << "A_ldlt: " << A_ldlt.rows() << " x " << A_ldlt.cols()
-    //   //           << " in " << A_ldlt.blocks.size() << " blocks: { ";
-    //   // for (const auto &block : A_ldlt.blocks) {
-    //   //   std::cerr << block.rows() << ", ";
-    //   // }
-    //   // std::cerr << " }" << std::endl;
-    //   const Eigen::MatrixXd mean_w_full =
-    //       A_ldlt.blocks[block_number].solve(ydiff_b);
-    //   // std::cout << "mean_w_full (" << mean_w_full.rows() << "x"
-    //   //           << mean_w_full.cols() << "):\n"
-    //   //           << mean_w_full << std::endl;
-    //   // const Eigen::MatrixXd mean_w_block = cols_B.transpose() *
-    //   mean_w_full;
-    //   // std::cout << "mean_w_block (" << mean_w_block.rows() << "x"
-    //   //           << mean_w_block.cols() << "):\n"
-    //   //           << mean_w_block << std::endl;
-    //   // mean_w.emplace_back(cols_B.transpose() *
-    //   //                     A_ldlt.blocks[block_number].solve(ydiff_b));
-    //   mean_w.push_back(mean_w_full);
-    //   // Y \in R^(u x b) = K_uu^-1 K_uB
-    //   covariance_Y.emplace_back(K_uu_ldlt.solve(K_fu.transpose() * cols_B));
-    //   // W \in R^(u x b) = K_uu^-1 K_uC S_CB^-1
-    //   //                 = K_uu^-1 K_uC (A^-1 - Z^T Z)
-    //   //                 [A^-1 is block diagonal; C and B are disjoint]
-    //   //                 = K_uu^-1 K_uC (- Z^T Z)
-
-    //   if (A_ldlt.blocks.size() > 1) {
-    //     // std::cout << "K_fu (" << K_fu.rows() << "x" << K_fu.cols() <<
-    //     "):\n"
-    //     //           << K_fu << std::endl;
-    //     const Eigen::MatrixXd KufC = K_fu.transpose() * cols_C;
-    //     // std::cout << "KufC (" << KufC.rows() << "x" << KufC.cols() <<
-    //     "):\n"
-    //     //           << KufC << std::endl;
-    //     const Eigen::MatrixXd ZC = Z * cols_C;
-    //     // std::cout << "ZC.transpose() (" << ZC.transpose().rows() << "x"
-    //     //           << ZC.transpose().cols() << "):\n"
-    //     //           << ZC.transpose() << std::endl;
-
-    //     const Eigen::MatrixXd KuZT = KufC * ZC.transpose();
-    //     const Eigen::MatrixXd KuZTZ = KuZT * Z;
-    //     const Eigen::MatrixXd KuZTZB = -KuZTZ * cols_B;
-    //     covariance_W.emplace_back(K_uu_ldlt.solve(KuZTZB));
-    //     // covariance_W.emplace_back(K_uu_ldlt.solve(
-    //     //     (-(K_fu.transpose() * cols_C) * Z.transpose() * Z) * cols_B));
-    //   }
-    // }
-
     apply_map(block_start_indices, precompute_block, Base::threads_.get());
 
     using FitType =
@@ -901,12 +713,6 @@ class PICGaussianProcessRegression
       const Fit<PICGPFit<GrouperFunction, InducingFeatureType, FitFeatureType>>
           &sparse_gp_fit,
       PredictTypeIdentity<JointDistribution> &&) const {
-    // std::cout << "features (" << features.size() << "): ";
-    // for (const auto &f : features) {
-    //   std::cout << f << "  ";
-    // }
-    // std::cout << std::endl;
-
     const auto find_group = [this, &sparse_gp_fit](const auto &feature) {
       const auto group = sparse_gp_fit.measurement_groups.find(
           independent_group_function_(without_measurement(feature)));
@@ -921,18 +727,9 @@ class PICGaussianProcessRegression
       }
       return group;
     };
-    // using CalculatedInducingFeatureType =
-    //     typename std::decay<typename std::result_of<InducingPointStrategy(
-    //         const CovFunc &,
-    //         const std::vector<FeatureType> &)>::type::value_type>::type;
-    // static_assert(
-    //     std::is_same<CalculatedInducingFeatureType,
-    //                  InducingFeatureType>::value &&
-    //     "A fitted PIC model must be able to compute the covariance between
-    //     its " "inducing point feature type and the feature type to be
-    //     predicted.");
+
     // K_up
-    const Eigen::MatrixXd cross_cov =
+    const Eigen::MatrixXd K_up =
         this->covariance_function_(sparse_gp_fit.inducing_features, features);
     const Eigen::MatrixXd prior_cov = this->covariance_function_(features);
 
@@ -940,89 +737,25 @@ class PICGaussianProcessRegression
         sparse_gp_fit.inducing_features.size(), features.size());
 
     Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
-    // std::cout << "mean_correction before (" << mean_correction.size() << "): "
-    //           << mean_correction.transpose().format(Eigen::FullPrecision)
-    //           << std::endl;
     std::vector<std::size_t> feature_to_block;
     for (Eigen::Index j = 0; j < features.size(); ++j) {
       const auto group = find_group(features[j]);
-      // const auto group = sparse_gp_fit.measurement_groups.find(
-      //     independent_group_function_(without_measurement(features[j])));
-      // assert(group != sparse_gp_fit.measurement_groups.end() &&
-      //        "TODO(@peddie): the group function in a PIC GP model must
-      //        cover " "the entire feature domain in any fit.");
       feature_to_block.push_back(
           std::distance(sparse_gp_fit.measurement_groups.begin(), group));
-      // if (sparse_gp_fit.A_ldlt.blocks.size() > 1) {
-        const std::vector<FeatureType> fvec = {features[j]};
-
-        const Eigen::VectorXd features_cov =
-            this->covariance_function_(group->second.dataset.features, fvec);
-        // std::cout << "Feature " << j << "(" <<
-        // without_measurement(features[j])
-        //           << ") in group '" << group->first << "' ("
-        //           << group->second.block_size << ": "
-        //           << group->second.initial_row << " -> "
-        //           << group->second.initial_row + group->second.block_size <<
-        //           ")"
-        //           << std::endl;
-        // std::cout << "inducing_points: "
-        //           << sparse_gp_fit.inducing_features.size() << std::endl;
-        // std::cout << "cross_cov (" << cross_cov.rows() << "x"
-        //           << cross_cov.cols() << "):\n"
-        //           << cross_cov << std::endl;
-        // std::cout << "features_cov (" << features_cov.rows() << "x"
-        //           << features_cov.cols() << "):\n"
-        //           << features_cov << std::endl;
-        // std::cout
-        //     << "sparse_gp_fit.covariance_Y[group->second.block_index] ("
-        //     << sparse_gp_fit.covariance_Y[group->second.block_index].rows()
-        //     << "x"
-        //     << sparse_gp_fit.covariance_Y[group->second.block_index].cols()
-        //     << "):\n"
-        //     << sparse_gp_fit.covariance_Y[group->second.block_index]
-        //     << std::endl;
-        const Eigen::VectorXd kpuy =
-            cross_cov.transpose().row(j) *
-            sparse_gp_fit.covariance_Y[group->second.block_index];
-        // std::cout << "kpuy (" << kpuy.rows() << "x" << kpuy.cols() << "):\n"
-        //           << kpuy << std::endl;
-        const Eigen::VectorXd Vbp = features_cov - kpuy;
-        mean_correction[j] =
-            Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
-        const Eigen::VectorXd wvj =
-            sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[j]] * Vbp;
-        WV.col(j) = wvj;
-
-        // std::cout << "Vbp[" << j << "] (" << Vbp.size()
-        //           << "): " << Vbp.transpose().format(Eigen::FullPrecision)
-        //           << std::endl;
-        // std::cout << "sparse_gp_fit.mean_w[" << group->second.block_index
-        //           << "] ("
-        //           << sparse_gp_fit.mean_w[group->second.block_index].size()
-        //           << "): "
-        //           << sparse_gp_fit.mean_w[group->second.block_index]
-        //                  .transpose()
-        //                  .format(Eigen::FullPrecision)
-        //           << std::endl;
-
-        // WV.col(j) =
-        //     sparse_gp_fit.covariance_W[group->second.block_index] *
-        //     (this->covariance_function_(group->second.dataset.features, fvec)
-        //     -
-        //      cross_cov.transpose() *
-        //          sparse_gp_fit.covariance_Y[group->second.block_index]);
-      // }
-    }
-
-    // std::cout << "mean_correction after (" << mean_correction.size() << "): "
-    //           << mean_correction.transpose().format(Eigen::FullPrecision)
-    //           << std::endl;
-    // std::cout << "WV (" << WV.rows() << "x" << WV.cols() << "):\n"
-    //           << WV << std::endl;
+      const std::vector<FeatureType> fvec = {features[j]};
 
-    // std::cout << "mean_correction (" << mean_correction.size()
-    //           << "): " << mean_correction.transpose() << std::endl;
+      const Eigen::VectorXd features_cov =
+          this->covariance_function_(group->second.dataset.features, fvec);
+      const Eigen::VectorXd kpuy =
+          K_up.transpose().row(j) *
+          sparse_gp_fit.covariance_Y[group->second.block_index];
+      const Eigen::VectorXd Vbp = features_cov - kpuy;
+      mean_correction[j] =
+          Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
+      const Eigen::VectorXd wvj =
+          sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[j]] * Vbp;
+      WV.col(j) = wvj;
+    }
 
     Eigen::MatrixXd VSV(features.size(), features.size());
 
@@ -1030,21 +763,9 @@ class PICGaussianProcessRegression
       for (Eigen::Index col = 0; col <= row; ++col) {
         const auto row_group = find_group(features[row]);
         const auto column_group = find_group(features[col]);
-        // const auto row_group = sparse_gp_fit.measurement_groups.find(
-        //     independent_group_function_(without_measurement(features[row])));
-        // assert(row_group != sparse_gp_fit.measurement_groups.end() &&
-        //        "TODO(@peddie): the group function in a PIC GP model must
-        //        cover " "the entire feature domain in any fit.");
-
-        // const auto column_group = sparse_gp_fit.measurement_groups.find(
-        //     independent_group_function_(without_measurement(features[col])));
-        // assert(column_group != sparse_gp_fit.measurement_groups.end() &&
-        //        "TODO(@peddie): the group function in a PIC GP model must
-        //        cover " "the entire feature domain in any fit.");
-
         // TODO(@peddie): these are K, not V!
         const Eigen::VectorXd Q_row_p =
-            cross_cov.transpose().row(row) *
+            K_up.transpose().row(row) *
             sparse_gp_fit.covariance_Y[row_group->second.block_index];
         const std::vector<FeatureType> row_fvec = {features[row]};
         const Eigen::VectorXd V_row_p =
@@ -1052,7 +773,7 @@ class PICGaussianProcessRegression
                                        row_fvec) -
             Q_row_p;
         const Eigen::VectorXd Q_column_p =
-            cross_cov.transpose().row(col) *
+            K_up.transpose().row(col) *
             sparse_gp_fit.covariance_Y[column_group->second.block_index];
         const std::vector<FeatureType> column_fvec = {features[col]};
         const Eigen::VectorXd V_column_p =
@@ -1072,9 +793,6 @@ class PICGaussianProcessRegression
                   .dot(sparse_gp_fit.A_ldlt.blocks[feature_to_block[col]]
                            .sqrt_solve(V_column_p)
                            .col(0));
-          // std::cout << "VSV(" << row << ", " << col << ") same block ("
-          //           << feature_to_block[row] << "):\n"
-          //           << VSV(row, col) << std::endl;
         }
 
         const Eigen::MatrixXd rowblock = sparse_gp_fit.Z.block(
@@ -1084,36 +802,19 @@ class PICGaussianProcessRegression
             0, column_group->second.initial_row, sparse_gp_fit.Z.rows(),
             column_group->second.block_size);
 
-        // std::cout << "VSV(" << row << ", " << col << "):\n"
-        //           << "rowblock (" << rowblock.rows() << "x" << rowblock.cols()
-        //           << "):\n"
-        //           << rowblock << "\ncolblock (" << columnblock.rows() << "x"
-        //           << columnblock.cols() << "):\n"
-        //           << columnblock << "\nV_row_p (" << V_row_p.size()
-        //           << "): " << V_row_p.transpose() << "\nV_column_p ("
-        //           << V_column_p.size() << "): " << V_column_p.transpose()
-        //           << "\nvalue: "
-        //           << (rowblock * V_row_p).dot(columnblock * V_column_p)
-        //           << std::endl;
-
         VSV(row, col) -= (rowblock * V_row_p).dot(columnblock * V_column_p);
       }
     }
 
     VSV.triangularView<Eigen::Upper>() = VSV.transpose();
 
-    // std::cout << "VSV (" << VSV.rows() << "x" << VSV.cols() << "):\n"
-    //           << VSV << std::endl;
-    const Eigen::MatrixXd U = cross_cov.transpose() * WV;
-
-    // std::cout << "U (" << U.rows() << "x" << U.cols() << "):\n"
-    //           << U << std::endl;
+    const Eigen::MatrixXd U = K_up.transpose() * WV;
 
     const Eigen::MatrixXd S_sqrt =
-        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P, cross_cov);
+        sqrt_solve(sparse_gp_fit.sigma_R, sparse_gp_fit.P, K_up);
 
     const Eigen::MatrixXd Q_sqrt =
-        sparse_gp_fit.train_covariance.sqrt_solve(cross_cov);
+        sparse_gp_fit.train_covariance.sqrt_solve(K_up);
 
     const Eigen::MatrixXd max_explained = Q_sqrt.transpose() * Q_sqrt;
     const Eigen::MatrixXd unexplained = S_sqrt.transpose() * S_sqrt;
@@ -1121,17 +822,9 @@ class PICGaussianProcessRegression
     const Eigen::MatrixXd pitc_covariance =
         prior_cov - max_explained + unexplained;
 
-    // std::cout << "pitc_covariance (" << pitc_covariance.rows() << "x"
-    //           << pitc_covariance.cols() << "):\n"
-    //           << pitc_covariance << std::endl;
-
     const Eigen::MatrixXd pic_correction = U + U.transpose() + VSV;
 
-    // std::cout << "pic_correction (" << pic_correction.rows() << "x"
-    //           << pic_correction.cols() << "):\n"
-    //           << pic_correction << std::endl;
-
-    JointDistribution pred(cross_cov.transpose() * sparse_gp_fit.information +
+    JointDistribution pred(K_up.transpose() * sparse_gp_fit.information +
                                mean_correction,
                            pitc_covariance - pic_correction);
 
@@ -1143,7 +836,7 @@ class PICGaussianProcessRegression
     // features.size()); for (std::size_t i = 0; i < features.size(); ++i) {
     //   const auto group = find_group(features[i]);
     //   K_pic.col(i) = sparse_gp_fit.cols_Cs[feature_to_block[i]] *
-    //                  cross_cov.transpose() *
+    //                  K_up.transpose() *
     //                  sparse_gp_fit.covariance_Ynot[feature_to_block[i]];
     //   std::cout << "Ynot: K_pic.col(" << i << "): " <<
     //   K_pic.col(i).transpose()
@@ -1161,8 +854,7 @@ class PICGaussianProcessRegression
 
     // const Eigen::MatrixXd K_pic_pitc =
     //     sparse_gp_fit.K_PITC_ldlt.sqrt_solve(K_pic);
-    // JointDistribution alt_pred(cross_cov.transpose() *
-    //                                sparse_gp_fit.information,
+    // JointDistribution alt_pred(K_up.transpose() * sparse_gp_fit.information,
     //                            prior_cov - K_pic_pitc.transpose() *
     //                            K_pic_pitc);
 
@@ -1365,7 +1057,7 @@ class PICGaussianProcessRegression
 
   Parameter measurement_nugget_;
   Parameter inducing_nugget_;
-  // InducingPointStrategy inducing_point_strategy_;
+  InducingPointStrategy inducing_point_strategy_;
   GrouperFunction independent_group_function_;
 };
 

From 7b93f79f40f8267f3d6c924b830b1dc9df3e278a Mon Sep 17 00:00:00 2001
From: Wolfgang Langhans <wolfgang.langhans@swift-nav.com>
Date: Fri, 3 May 2024 10:15:07 -0700
Subject: [PATCH 10/13] Minor changes so it compiles with cmake tools

---
 .../albatross/src/cereal/block_diagonal.hpp   |  2 +-
 include/albatross/src/cereal/gp.hpp           |  2 +-
 include/albatross/src/models/pic_gp.hpp       | 50 ++++++++++---------
 tests/CMakeLists.txt                          |  1 +
 tests/test_pic_gp.cc                          | 22 ++++----
 5 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/include/albatross/src/cereal/block_diagonal.hpp b/include/albatross/src/cereal/block_diagonal.hpp
index 964602bd..6b308dda 100644
--- a/include/albatross/src/cereal/block_diagonal.hpp
+++ b/include/albatross/src/cereal/block_diagonal.hpp
@@ -18,7 +18,7 @@ namespace cereal {
 template <typename Archive>
 inline void serialize(Archive &archive,
                       const albatross::BlockDiagonalLDLT &ldlt,
-                      const std::uint32_t version) {
+                      const syd::uint32_t ) {
   archive(cereal::make_nvp("blocks", ldlt.blocks));
 }
 
diff --git a/include/albatross/src/cereal/gp.hpp b/include/albatross/src/cereal/gp.hpp
index 9b421b83..a50e5e55 100644
--- a/include/albatross/src/cereal/gp.hpp
+++ b/include/albatross/src/cereal/gp.hpp
@@ -58,7 +58,7 @@ template <typename Archive, typename GrouperFunction,
 inline void
 serialize(Archive &archive,
           Fit<PICGPFit<GrouperFunction, InducingFeatureType, FeatureType>> &fit,
-          const std::uint32_t version) {
+          const std::uint32_t ) {
   archive(cereal::make_nvp("train_features", fit.train_features));
   archive(cereal::make_nvp("inducing_features", fit.inducing_features));
   archive(cereal::make_nvp("train_covariance", fit.train_covariance));
diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 22878fa3..c10a8c6a 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -443,7 +443,7 @@ class PICGaussianProcessRegression
     std::vector<Eigen::SparseMatrix<double>> cols_Bs(measurement_groups.size());
     const auto block_start_indices = A_ldlt.block_to_row_map();
 
-    const auto precompute_block = [this, &A_ldlt, &features, &y, &K_fu, &v, &Z,
+    const auto precompute_block = [&A_ldlt, &features, &y, &K_fu, &v,
                                    &K_uu_ldlt, &mean_w, &covariance_Y,
                                    &cols_Bs](std::size_t block_number,
                                              Eigen::Index start_row) -> void {
@@ -456,11 +456,11 @@ class PICGaussianProcessRegression
       //
       // This nonsense would be a lot more straightforward with Eigen
       // 3.4's slicing and indexing API.
-      Eigen::SparseMatrix<double> cols_B(features.size(), block_size);
+      Eigen::SparseMatrix<double> cols_B(cast::to_index(features.size()), block_size);
       cols_B.reserve(block_size);
-      Eigen::SparseMatrix<double> cols_C(features.size(),
-                                         features.size() - block_size);
-      cols_C.reserve(features.size() - block_size);
+      Eigen::SparseMatrix<double> cols_C(cast::to_index(features.size()),
+                                         cast::to_index(features.size()) - block_size);
+      cols_C.reserve(cast::to_index(features.size()) - block_size);
 
       for (Eigen::Index i = 0; i < block_size; ++i) {
         cols_B.insert(start_row + i, i) = 1.;
@@ -472,7 +472,7 @@ class PICGaussianProcessRegression
       // std::cout << "cols_B (" << cols_B.rows() << "x" << cols_B.cols() << "):\n"
       //           << Eigen::MatrixXd(cols_B) << std::endl;
 
-      for (Eigen::Index i = 0, j = 0; i < features.size();) {
+      for (Eigen::Index i = 0, j = 0; i < cast::to_index(features.size());) {
         if (i == start_row) {
           i += block_size;
           continue;
@@ -856,24 +856,24 @@ class PICGaussianProcessRegression
     const Eigen::MatrixXd prior_cov = this->covariance_function_(features);
 
     Eigen::MatrixXd WV = Eigen::MatrixXd::Zero(
-        sparse_gp_fit.inducing_features.size(), features.size());
+        cast::to_index(sparse_gp_fit.inducing_features.size()), cast::to_index(features.size()));
 
-    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(features.size());
+    Eigen::VectorXd mean_correction = Eigen::VectorXd::Zero(cast::to_index(features.size()));
     // std::cout << "mean_correction before (" << mean_correction.size() << "): "
     //           << mean_correction.transpose().format(Eigen::FullPrecision)
     //           << std::endl;
     std::vector<std::size_t> feature_to_block;
-    for (Eigen::Index j = 0; j < features.size(); ++j) {
-      const auto group = find_group(features[j]);
+    for (Eigen::Index j = 0; j < cast::to_index(features.size()); ++j) {
+      const auto group = find_group(features[cast::to_size(j)]);
       // const auto group = sparse_gp_fit.measurement_groups.find(
       //     independent_group_function_(without_measurement(features[j])));
       // assert(group != sparse_gp_fit.measurement_groups.end() &&
       //        "TODO(@peddie): the group function in a PIC GP model must
       //        cover " "the entire feature domain in any fit.");
       feature_to_block.push_back(
-          std::distance(sparse_gp_fit.measurement_groups.begin(), group));
+          cast::to_size(std::distance(sparse_gp_fit.measurement_groups.begin(), group)));
       // if (sparse_gp_fit.A_ldlt.blocks.size() > 1) {
-        const std::vector<FeatureType> fvec = {features[j]};
+        const std::vector<FeatureType> fvec = {features[cast::to_size(j)]};
 
         const Eigen::VectorXd features_cov =
             this->covariance_function_(group->second.dataset.features, fvec);
@@ -903,14 +903,14 @@ class PICGaussianProcessRegression
         //     << std::endl;
         const Eigen::VectorXd kpuy =
             cross_cov.transpose().row(j) *
-            sparse_gp_fit.covariance_Y[group->second.block_index];
+            sparse_gp_fit.covariance_Y[cast::to_size(group->second.block_index)];
         // std::cout << "kpuy (" << kpuy.rows() << "x" << kpuy.cols() << "):\n"
         //           << kpuy << std::endl;
         const Eigen::VectorXd Vbp = features_cov - kpuy;
         mean_correction[j] =
-            Vbp.dot(sparse_gp_fit.mean_w[group->second.block_index]);
+            Vbp.dot(sparse_gp_fit.mean_w[cast::to_size(group->second.block_index)]);
         const Eigen::VectorXd wvj =
-            sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[j]] * Vbp;
+            sparse_gp_fit.W * sparse_gp_fit.cols_Bs[feature_to_block[cast::to_size(j)]] * Vbp;
         WV.col(j) = wvj;
 
         // std::cout << "Vbp[" << j << "] (" << Vbp.size()
@@ -945,8 +945,10 @@ class PICGaussianProcessRegression
 
     Eigen::MatrixXd VSV(features.size(), features.size());
 
-    for (Eigen::Index row = 0; row < VSV.rows(); ++row) {
-      for (Eigen::Index col = 0; col <= row; ++col) {
+    for (std::size_t row = 0; row < features.size(); ++row) {
+      for (std::size_t col = 0; col <= row; ++col) {
+        Eigen::Index row_index = cast::to_index(row);
+        Eigen::Index col_index = cast::to_index(col);
         const auto row_group = find_group(features[row]);
         const auto column_group = find_group(features[col]);
         // const auto row_group = sparse_gp_fit.measurement_groups.find(
@@ -963,28 +965,28 @@ class PICGaussianProcessRegression
 
         // TODO(@peddie): these are K, not V!
         const Eigen::VectorXd Q_row_p =
-            cross_cov.transpose().row(row) *
-            sparse_gp_fit.covariance_Y[row_group->second.block_index];
+            cross_cov.transpose().row(row_index) *
+            sparse_gp_fit.covariance_Y[cast::to_size(row_group->second.block_index)];
         const std::vector<FeatureType> row_fvec = {features[row]};
         const Eigen::VectorXd V_row_p =
             this->covariance_function_(row_group->second.dataset.features,
                                        row_fvec) -
             Q_row_p;
         const Eigen::VectorXd Q_column_p =
-            cross_cov.transpose().row(col) *
-            sparse_gp_fit.covariance_Y[column_group->second.block_index];
+            cross_cov.transpose().row(col_index) *
+            sparse_gp_fit.covariance_Y[cast::to_size(column_group->second.block_index)];
         const std::vector<FeatureType> column_fvec = {features[col]};
         const Eigen::VectorXd V_column_p =
             this->covariance_function_(column_group->second.dataset.features,
                                        column_fvec) -
             Q_column_p;
 
-        VSV(row, col) = 0.;
+        VSV(row_index, col_index) = 0.;
 
         assert(row < feature_to_block.size());
         assert(col < feature_to_block.size());
         if (feature_to_block[row] == feature_to_block[col]) {
-          VSV(row, col) =
+          VSV(row_index, col_index) =
               sparse_gp_fit.A_ldlt.blocks[feature_to_block[row]]
                   .sqrt_solve(V_row_p)
                   .col(0)
@@ -1015,7 +1017,7 @@ class PICGaussianProcessRegression
         //           << (rowblock * V_row_p).dot(columnblock * V_column_p)
         //           << std::endl;
 
-        VSV(row, col) -= (rowblock * V_row_p).dot(columnblock * V_column_p);
+        VSV(row_index, col_index) -= (rowblock * V_row_p).dot(columnblock * V_column_p);
       }
     }
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 32a13262..77fd9db0 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -48,6 +48,7 @@ swift_add_tool(albatross_unit_tests
   test_serializable_ldlt.cc
   test_serialize.cc
   test_sparse_gp.cc
+  test_pic_gp.cc
   test_stats.cc
   test_thread_pool.cc
   test_traits_cereal.cc
diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
index 9bc6b7d1..6781446a 100644
--- a/tests/test_pic_gp.cc
+++ b/tests/test_pic_gp.cc
@@ -234,8 +234,8 @@ class BruteForcePIC
     Eigen::VectorXd K_uy(inducing_points_.size());
     for (Eigen::Index i = 0;
          i < static_cast<Eigen::Index>(inducing_points_.size()); ++i) {
-      K_xu[i] = cov_(x, inducing_points_[i]);
-      K_uy[i] = cov_(inducing_points_[i], y);
+      K_xu[i] = cov_(x, inducing_points_[cast::to_size(i)]);
+      K_uy[i] = cov_(inducing_points_[cast::to_size(i)], y);
     }
     // const Eigen::VectorXd K_uy = cov_(inducing_points_, y);
     return K_xu.dot(K_uu_ldlt_.solve(K_uy));
@@ -657,29 +657,29 @@ TEST(TestPicGP, EmitCSV) {
   csv_out << "type,idx,x,mean,marginal,group\n";
   for (std::size_t pred = 0; pred < direct_pred.size(); ++pred) {
     csv_out << "dense," << std::setprecision(16) << pred << ','
-            << test_features[pred] << ',' << direct_pred.mean[pred] << ','
-            << direct_pred.covariance(pred, pred) << ','
+            << test_features[pred] << ',' << direct_pred.mean[cast::to_index(pred)] << ','
+            << direct_pred.covariance(cast::to_index(pred), cast::to_index(pred)) << ','
             << grouper(test_features[pred]) << '\n';
   }
 
   for (std::size_t pred = 0; pred < pitc_pred.size(); ++pred) {
     csv_out << "pitc," << std::setprecision(16) << pred << ','
-            << test_features[pred] << ',' << pitc_pred.mean[pred] << ','
-            << pitc_pred.covariance(pred, pred) << ','
+            << test_features[pred] << ',' << pitc_pred.mean[cast::to_index(pred)] << ','
+            << pitc_pred.covariance(cast::to_index(pred), cast::to_index(pred)) << ','
             << grouper(test_features[pred]) << '\n';
   }
 
   for (std::size_t pred = 0; pred < pic_pred.size(); ++pred) {
     csv_out << "pic," << std::setprecision(16) << pred << ','
-            << test_features[pred] << ',' << pic_pred.mean[pred] << ','
-            << pic_pred.covariance(pred, pred) << ','
+            << test_features[pred] << ',' << pic_pred.mean[cast::to_index(pred)] << ','
+            << pic_pred.covariance(cast::to_index(pred), cast::to_index(pred)) << ','
             << grouper(test_features[pred]) << '\n';
   }
 
   for (std::size_t pred = 0; pred < bfp_pred.size(); ++pred) {
     csv_out << "bfp," << std::setprecision(16) << pred << ','
-            << test_features[pred] << ',' << bfp_pred.mean[pred] << ','
-            << bfp_pred.covariance(pred, pred) << ','
+            << test_features[pred] << ',' << bfp_pred.mean[cast::to_index(pred)] << ','
+            << bfp_pred.covariance(cast::to_index(pred), cast::to_index(pred)) << ','
             << grouper(test_features[pred]) << '\n';
   }
 
@@ -692,7 +692,7 @@ TEST(TestPicGP, EmitCSV) {
   points_out << "type,x,y\n";
   for (std::size_t i = 0; i < dataset.size(); ++i) {
     points_out << "train," << dataset.features[i] << ','
-               << dataset.targets.mean[i] << '\n';
+               << dataset.targets.mean[cast::to_index(i)] << '\n';
   }
 
   for (const auto &f : pic_fit.get_fit().inducing_features) {

From 201fe96c062a4330d8bf1062342bdfecdb33a36a Mon Sep 17 00:00:00 2001
From: Wolfgang Langhans <wolfgang.langhans@swift-nav.com>
Date: Mon, 6 May 2024 10:55:02 -0700
Subject: [PATCH 11/13] fixed two things

---
 include/albatross/src/cereal/block_diagonal.hpp | 2 +-
 include/albatross/src/models/pic_gp.hpp         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/albatross/src/cereal/block_diagonal.hpp b/include/albatross/src/cereal/block_diagonal.hpp
index 6b308dda..12279858 100644
--- a/include/albatross/src/cereal/block_diagonal.hpp
+++ b/include/albatross/src/cereal/block_diagonal.hpp
@@ -18,7 +18,7 @@ namespace cereal {
 template <typename Archive>
 inline void serialize(Archive &archive,
                       const albatross::BlockDiagonalLDLT &ldlt,
-                      const syd::uint32_t ) {
+                      const std::uint32_t ) {
   archive(cereal::make_nvp("blocks", ldlt.blocks));
 }
 
diff --git a/include/albatross/src/models/pic_gp.hpp b/include/albatross/src/models/pic_gp.hpp
index 16c69ff8..61534b6a 100644
--- a/include/albatross/src/models/pic_gp.hpp
+++ b/include/albatross/src/models/pic_gp.hpp
@@ -740,8 +740,8 @@ class PICGaussianProcessRegression
     std::vector<std::size_t> feature_to_block;
     for (Eigen::Index j = 0; j < cast::to_index(features.size()); ++j) {
       const auto group = find_group(features[cast::to_size(j)]);
-      feature_to_block.push_back(
-          std::distance(sparse_gp_fit.measurement_groups.begin(), group));
+      feature_to_block.push_back(cast::to_size(
+          std::distance(sparse_gp_fit.measurement_groups.begin(), group)));
       const std::vector<FeatureType> fvec = {features[cast::to_size(j)]};
 
       const Eigen::VectorXd features_cov =

From 950b0f653cfa29687084c4336ef455040c0c6560 Mon Sep 17 00:00:00 2001
From: notoriaga <smeyer@fastmail.com>
Date: Mon, 6 May 2024 12:32:05 -0700
Subject: [PATCH 12/13] update swift_rules update rules_swiftnav

---
 WORKSPACE.bazel | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel
index ab62240f..cc08b75b 100644
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@@ -1,12 +1,12 @@
 workspace(name = "albatross")
 
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
-    
+
 http_archive(
     name = "rules_swiftnav",
-    sha256 = "8e00b694b6dce9c91d811e2e9e13e148af5f1dd329b85b7c5d2d9d2238aa92dc",
-    strip_prefix = "rules_swiftnav-1c51c97743c9632169dd7e5a228c6ce915893236",
-    url = "https://github.com/swift-nav/rules_swiftnav/archive/1c51c97743c9632169dd7e5a228c6ce915893236.tar.gz",
+    sha256 = "2f91790be7845e2eeb746dd33c59d1ba5d6ee85360a3218d2a4a77ead79f81b9",
+    strip_prefix = "rules_swiftnav-030c1c5050e7541ce66cd82f523939a525b1551c",
+    url = "https://github.com/swift-nav/rules_swiftnav/archive/030c1c5050e7541ce66cd82f523939a525b1551c.tar.gz",
 )
 
 load("@rules_swiftnav//cc:repositories.bzl", "register_swift_cc_toolchains", "swift_cc_toolchain")

From 33a2aa99cb5f030d102ca3fb84251a5945164f11 Mon Sep 17 00:00:00 2001
From: Wolfgang Langhans <wolfgang.langhans@swift-nav.com>
Date: Mon, 6 May 2024 13:15:41 -0700
Subject: [PATCH 13/13] modified threshold

---
 tests/test_pic_gp.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_pic_gp.cc b/tests/test_pic_gp.cc
index 6781446a..0849d487 100644
--- a/tests/test_pic_gp.cc
+++ b/tests/test_pic_gp.cc
@@ -720,7 +720,7 @@ TEST(TestPicGP, EmitCSV) {
   //           << bfp_pred.covariance << std::endl;
 
   const double pic_error = (pic_pred.mean - direct_pred.mean).norm();
-  EXPECT_LT(pic_error, 5e-7);
+  EXPECT_LT(pic_error, 6.5e-7);
   // EXPECT_LT((pic_pred.mean - test_result.mean).norm(), 1e-7);
   // << "|u|: " << kNumInducingPoints << "; |f|: " << dataset.size()
   // << "; |p|: " << test_features.size()